aoptx86.pas 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  354. with insprop[p.opcode] do
  355. begin
  356. case getsubreg(reg) of
  357. R_SUBW,R_SUBD,R_SUBQ:
  358. Result:=
  359. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  360. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  361. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  362. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  363. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  364. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  365. R_SUBFLAGCARRY:
  366. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  367. R_SUBFLAGPARITY:
  368. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  369. R_SUBFLAGAUXILIARY:
  370. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  371. R_SUBFLAGZERO:
  372. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  373. R_SUBFLAGSIGN:
  374. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  375. R_SUBFLAGOVERFLOW:
  376. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  377. R_SUBFLAGINTERRUPT:
  378. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  379. R_SUBFLAGDIRECTION:
  380. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  381. else
  382. internalerror(2017050501);
  383. end;
  384. exit;
  385. end;
  386. Result :=
  387. (((p.opcode = A_MOV) or
  388. (p.opcode = A_MOVZX) or
  389. (p.opcode = A_MOVSX) or
  390. (p.opcode = A_LEA) or
  391. (p.opcode = A_VMOVSS) or
  392. (p.opcode = A_VMOVSD) or
  393. (p.opcode = A_VMOVAPD) or
  394. (p.opcode = A_VMOVAPS) or
  395. (p.opcode = A_VMOVQ) or
  396. (p.opcode = A_MOVSS) or
  397. (p.opcode = A_MOVSD) or
  398. (p.opcode = A_MOVQ) or
  399. (p.opcode = A_MOVAPD) or
  400. (p.opcode = A_MOVAPS) or
  401. {$ifndef x86_64}
  402. (p.opcode = A_LDS) or
  403. (p.opcode = A_LES) or
  404. {$endif not x86_64}
  405. (p.opcode = A_LFS) or
  406. (p.opcode = A_LGS) or
  407. (p.opcode = A_LSS)) and
  408. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  409. (p.oper[1]^.typ = top_reg) and
  410. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  411. ((p.oper[0]^.typ = top_const) or
  412. ((p.oper[0]^.typ = top_reg) and
  413. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  414. ((p.oper[0]^.typ = top_ref) and
  415. not RegInRef(reg,p.oper[0]^.ref^)))) or
  416. ((p.opcode = A_POP) and
  417. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  418. ((p.opcode = A_IMUL) and
  419. (p.ops=3) and
  420. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  421. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  422. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  423. ((((p.opcode = A_IMUL) or
  424. (p.opcode = A_MUL)) and
  425. (p.ops=1)) and
  426. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  427. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  428. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  429. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  430. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  431. {$ifdef x86_64}
  432. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  433. {$endif x86_64}
  434. )) or
  435. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  436. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  437. {$ifdef x86_64}
  438. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  439. {$endif x86_64}
  440. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  441. {$ifndef x86_64}
  442. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  443. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  444. {$endif not x86_64}
  445. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  446. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  447. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  448. {$ifndef x86_64}
  449. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  450. {$endif not x86_64}
  451. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  452. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  453. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  454. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  455. {$ifdef x86_64}
  456. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  457. {$endif x86_64}
  458. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  459. (((p.opcode = A_FSTSW) or
  460. (p.opcode = A_FNSTSW)) and
  461. (p.oper[0]^.typ=top_reg) and
  462. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  463. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  464. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  465. (p.oper[0]^.reg=p.oper[1]^.reg) and
  466. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  467. end;
  468. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  469. var
  470. hp2,hp3 : tai;
  471. begin
  472. result:=(p.typ=ait_instruction) and
  473. ((taicpu(p).opcode = A_RET) or
  474. ((taicpu(p).opcode=A_LEAVE) and
  475. GetNextInstruction(p,hp2) and
  476. (hp2.typ=ait_instruction) and
  477. (taicpu(hp2).opcode=A_RET)
  478. ) or
  479. ((taicpu(p).opcode=A_MOV) and
  480. (taicpu(p).oper[0]^.typ=top_reg) and
  481. (taicpu(p).oper[0]^.reg=NR_EBP) and
  482. (taicpu(p).oper[1]^.typ=top_reg) and
  483. (taicpu(p).oper[1]^.reg=NR_ESP) and
  484. GetNextInstruction(p,hp2) and
  485. (hp2.typ=ait_instruction) and
  486. (taicpu(hp2).opcode=A_POP) and
  487. (taicpu(hp2).oper[0]^.typ=top_reg) and
  488. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  489. GetNextInstruction(hp2,hp3) and
  490. (hp3.typ=ait_instruction) and
  491. (taicpu(hp3).opcode=A_RET)
  492. )
  493. );
  494. end;
  495. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  496. begin
  497. isFoldableArithOp := False;
  498. case hp1.opcode of
  499. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  500. isFoldableArithOp :=
  501. ((taicpu(hp1).oper[0]^.typ = top_const) or
  502. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  503. (taicpu(hp1).oper[0]^.reg <> reg))) and
  504. (taicpu(hp1).oper[1]^.typ = top_reg) and
  505. (taicpu(hp1).oper[1]^.reg = reg);
  506. A_INC,A_DEC,A_NEG,A_NOT:
  507. isFoldableArithOp :=
  508. (taicpu(hp1).oper[0]^.typ = top_reg) and
  509. (taicpu(hp1).oper[0]^.reg = reg);
  510. end;
  511. end;
  512. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  513. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  514. var
  515. hp2: tai;
  516. begin
  517. hp2 := p;
  518. repeat
  519. hp2 := tai(hp2.previous);
  520. if assigned(hp2) and
  521. (hp2.typ = ait_regalloc) and
  522. (tai_regalloc(hp2).ratype=ra_dealloc) and
  523. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  524. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  525. begin
  526. asml.remove(hp2);
  527. hp2.free;
  528. break;
  529. end;
  530. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  531. end;
  532. begin
  533. case current_procinfo.procdef.returndef.typ of
  534. arraydef,recorddef,pointerdef,
  535. stringdef,enumdef,procdef,objectdef,errordef,
  536. filedef,setdef,procvardef,
  537. classrefdef,forwarddef:
  538. DoRemoveLastDeallocForFuncRes(RS_EAX);
  539. orddef:
  540. if current_procinfo.procdef.returndef.size <> 0 then
  541. begin
  542. DoRemoveLastDeallocForFuncRes(RS_EAX);
  543. { for int64/qword }
  544. if current_procinfo.procdef.returndef.size = 8 then
  545. DoRemoveLastDeallocForFuncRes(RS_EDX);
  546. end;
  547. end;
  548. end;
  549. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  550. var
  551. TmpUsedRegs : TAllUsedRegs;
  552. hp1,hp2 : tai;
  553. begin
  554. result:=false;
  555. if MatchOpType(taicpu(p),top_reg,top_reg) then
  556. begin
  557. { vmova* reg1,reg1
  558. =>
  559. <nop> }
  560. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  561. begin
  562. GetNextInstruction(p,hp1);
  563. asml.Remove(p);
  564. p.Free;
  565. p:=hp1;
  566. result:=true;
  567. end
  568. else if GetNextInstruction(p,hp1) then
  569. begin
  570. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  571. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  572. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  573. begin
  574. { vmova* reg1,reg2
  575. vmova* reg2,reg3
  576. dealloc reg2
  577. =>
  578. vmova* reg1,reg3 }
  579. CopyUsedRegs(TmpUsedRegs);
  580. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  581. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  582. begin
  583. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  584. asml.Remove(hp1);
  585. hp1.Free;
  586. result:=true;
  587. end
  588. { special case:
  589. vmova* reg1,reg2
  590. vmova* reg2,reg1
  591. =>
  592. vmova* reg1,reg2 }
  593. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  594. begin
  595. asml.Remove(hp1);
  596. hp1.Free;
  597. result:=true;
  598. end
  599. end
  600. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  601. { we mix single and double opperations here because we assume that the compiler
  602. generates vmovapd only after double operations and vmovaps only after single operations }
  603. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  604. GetNextInstruction(hp1,hp2) and
  605. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  606. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  607. begin
  608. CopyUsedRegs(TmpUsedRegs);
  609. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  610. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  611. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  612. then
  613. begin
  614. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  615. asml.Remove(p);
  616. p.Free;
  617. asml.Remove(hp2);
  618. hp2.Free;
  619. p:=hp1;
  620. end;
  621. end;
  622. end;
  623. end;
  624. end;
  625. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  626. var
  627. TmpUsedRegs : TAllUsedRegs;
  628. hp1 : tai;
  629. begin
  630. result:=false;
  631. if GetNextInstruction(p,hp1) and
  632. { we mix single and double opperations here because we assume that the compiler
  633. generates vmovapd only after double operations and vmovaps only after single operations }
  634. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  635. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  636. (taicpu(hp1).oper[1]^.typ=top_reg) then
  637. begin
  638. CopyUsedRegs(TmpUsedRegs);
  639. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  640. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  641. ) then
  642. begin
  643. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  644. asml.Remove(hp1);
  645. hp1.Free;
  646. result:=true;
  647. end;
  648. end;
  649. end;
  650. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  651. var
  652. hp1, hp2: tai;
  653. TmpUsedRegs : TAllUsedRegs;
  654. GetNextIntruction_p : Boolean;
  655. begin
  656. Result:=false;
  657. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  658. if GetNextIntruction_p and
  659. MatchInstruction(hp1,A_AND,[]) and
  660. (taicpu(p).oper[1]^.typ = top_reg) and
  661. MatchOpType(taicpu(hp1),top_const,top_reg) and
  662. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  663. case taicpu(p).opsize Of
  664. S_L:
  665. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  666. begin
  667. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  668. asml.remove(hp1);
  669. hp1.free;
  670. Result:=true;
  671. exit;
  672. end;
  673. end
  674. else if GetNextIntruction_p and
  675. MatchInstruction(hp1,A_MOV,[]) and
  676. (taicpu(p).oper[1]^.typ = top_reg) and
  677. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  678. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  679. begin
  680. CopyUsedRegs(TmpUsedRegs);
  681. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  682. { we have
  683. mov x, %treg
  684. mov %treg, y
  685. }
  686. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  687. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  688. { we've got
  689. mov x, %treg
  690. mov %treg, y
  691. with %treg is not used after }
  692. case taicpu(p).oper[0]^.typ Of
  693. top_reg:
  694. begin
  695. { change
  696. mov %reg, %treg
  697. mov %treg, y
  698. to
  699. mov %reg, y
  700. }
  701. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  702. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  703. asml.remove(hp1);
  704. hp1.free;
  705. ReleaseUsedRegs(TmpUsedRegs);
  706. Exit;
  707. end;
  708. top_ref:
  709. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  710. begin
  711. { change
  712. mov mem, %treg
  713. mov %treg, %reg
  714. to
  715. mov mem, %reg"
  716. }
  717. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  718. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  719. asml.remove(hp1);
  720. hp1.free;
  721. ReleaseUsedRegs(TmpUsedRegs);
  722. Exit;
  723. end;
  724. end;
  725. ReleaseUsedRegs(TmpUsedRegs);
  726. end
  727. else
  728. { Change
  729. mov %reg1, %reg2
  730. xxx %reg2, ???
  731. to
  732. mov %reg1, %reg2
  733. xxx %reg1, ???
  734. to avoid a write/read penalty
  735. }
  736. if MatchOpType(taicpu(p),top_reg,top_reg) and
  737. GetNextInstruction(p,hp1) and
  738. (tai(hp1).typ = ait_instruction) and
  739. (taicpu(hp1).ops >= 1) and
  740. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  741. { we have
  742. mov %reg1, %reg2
  743. XXX %reg2, ???
  744. }
  745. begin
  746. if ((taicpu(hp1).opcode = A_OR) or
  747. (taicpu(hp1).opcode = A_TEST)) and
  748. (taicpu(hp1).oper[1]^.typ = top_reg) and
  749. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  750. { we have
  751. mov %reg1, %reg2
  752. test/or %reg2, %reg2
  753. }
  754. begin
  755. CopyUsedRegs(TmpUsedRegs);
  756. { reg1 will be used after the first instruction,
  757. so update the allocation info }
  758. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  759. if GetNextInstruction(hp1, hp2) and
  760. (hp2.typ = ait_instruction) and
  761. taicpu(hp2).is_jmp and
  762. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  763. { change
  764. mov %reg1, %reg2
  765. test/or %reg2, %reg2
  766. jxx
  767. to
  768. test %reg1, %reg1
  769. jxx
  770. }
  771. begin
  772. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  773. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  774. asml.remove(p);
  775. p.free;
  776. p := hp1;
  777. ReleaseUsedRegs(TmpUsedRegs);
  778. Exit;
  779. end
  780. else
  781. { change
  782. mov %reg1, %reg2
  783. test/or %reg2, %reg2
  784. to
  785. mov %reg1, %reg2
  786. test/or %reg1, %reg1
  787. }
  788. begin
  789. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  790. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  791. end;
  792. ReleaseUsedRegs(TmpUsedRegs);
  793. end
  794. end
  795. else
  796. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  797. x >= RetOffset) as it doesn't do anything (it writes either to a
  798. parameter or to the temporary storage room for the function
  799. result)
  800. }
  801. if GetNextIntruction_p and
  802. (tai(hp1).typ = ait_instruction) then
  803. begin
  804. if IsExitCode(hp1) and
  805. MatchOpType(p,top_reg,top_ref) and
  806. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  807. not(assigned(current_procinfo.procdef.funcretsym) and
  808. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  809. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  810. begin
  811. asml.remove(p);
  812. p.free;
  813. p:=hp1;
  814. DebugMsg('Peephole removed deadstore before leave/ret',p);
  815. RemoveLastDeallocForFuncRes(p);
  816. exit;
  817. end
  818. { change
  819. mov reg1, mem1
  820. test/cmp x, mem1
  821. to
  822. mov reg1, mem1
  823. test/cmp x, reg1
  824. }
  825. else if MatchOpType(p,top_reg,top_ref) and
  826. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  827. (taicpu(hp1).oper[1]^.typ = top_ref) and
  828. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  829. begin
  830. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  831. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  832. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  833. end;
  834. end;
  835. { Next instruction is also a MOV ? }
  836. if GetNextIntruction_p and
  837. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  838. begin
  839. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  840. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  841. { mov reg1, mem1 or mov mem1, reg1
  842. mov mem2, reg2 mov reg2, mem2}
  843. begin
  844. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  845. { mov reg1, mem1 or mov mem1, reg1
  846. mov mem2, reg1 mov reg2, mem1}
  847. begin
  848. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  849. { Removes the second statement from
  850. mov reg1, mem1/reg2
  851. mov mem1/reg2, reg1 }
  852. begin
  853. if taicpu(p).oper[0]^.typ=top_reg then
  854. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  855. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  856. asml.remove(hp1);
  857. hp1.free;
  858. Result:=true;
  859. exit;
  860. end
  861. else
  862. begin
  863. CopyUsedRegs(TmpUsedRegs);
  864. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  865. if (taicpu(p).oper[1]^.typ = top_ref) and
  866. { mov reg1, mem1
  867. mov mem2, reg1 }
  868. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  869. GetNextInstruction(hp1, hp2) and
  870. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  871. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  872. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  873. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  874. { change to
  875. mov reg1, mem1 mov reg1, mem1
  876. mov mem2, reg1 cmp reg1, mem2
  877. cmp mem1, reg1
  878. }
  879. begin
  880. asml.remove(hp2);
  881. hp2.free;
  882. taicpu(hp1).opcode := A_CMP;
  883. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  884. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  885. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  886. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  887. end;
  888. ReleaseUsedRegs(TmpUsedRegs);
  889. end;
  890. end
  891. else if (taicpu(p).oper[1]^.typ=top_ref) and
  892. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  893. begin
  894. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  895. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  896. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  897. end
  898. else
  899. begin
  900. CopyUsedRegs(TmpUsedRegs);
  901. if GetNextInstruction(hp1, hp2) and
  902. MatchOpType(taicpu(p),top_ref,top_reg) and
  903. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  904. (taicpu(hp1).oper[1]^.typ = top_ref) and
  905. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  906. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  907. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  908. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  909. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  910. { mov mem1, %reg1
  911. mov %reg1, mem2
  912. mov mem2, reg2
  913. to:
  914. mov mem1, reg2
  915. mov reg2, mem2}
  916. begin
  917. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  918. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  919. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  920. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  921. asml.remove(hp2);
  922. hp2.free;
  923. end
  924. {$ifdef i386}
  925. { this is enabled for i386 only, as the rules to create the reg sets below
  926. are too complicated for x86-64, so this makes this code too error prone
  927. on x86-64
  928. }
  929. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  930. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  931. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  932. { mov mem1, reg1 mov mem1, reg1
  933. mov reg1, mem2 mov reg1, mem2
  934. mov mem2, reg2 mov mem2, reg1
  935. to: to:
  936. mov mem1, reg1 mov mem1, reg1
  937. mov mem1, reg2 mov reg1, mem2
  938. mov reg1, mem2
  939. or (if mem1 depends on reg1
  940. and/or if mem2 depends on reg2)
  941. to:
  942. mov mem1, reg1
  943. mov reg1, mem2
  944. mov reg1, reg2
  945. }
  946. begin
  947. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  948. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  949. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  950. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  951. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  952. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  953. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  954. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  955. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  956. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  957. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  958. end
  959. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  960. begin
  961. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  962. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  963. end
  964. else
  965. begin
  966. asml.remove(hp2);
  967. hp2.free;
  968. end
  969. {$endif i386}
  970. ;
  971. ReleaseUsedRegs(TmpUsedRegs);
  972. end;
  973. end
  974. (* { movl [mem1],reg1
  975. movl [mem1],reg2
  976. to
  977. movl [mem1],reg1
  978. movl reg1,reg2
  979. }
  980. else if (taicpu(p).oper[0]^.typ = top_ref) and
  981. (taicpu(p).oper[1]^.typ = top_reg) and
  982. (taicpu(hp1).oper[0]^.typ = top_ref) and
  983. (taicpu(hp1).oper[1]^.typ = top_reg) and
  984. (taicpu(p).opsize = taicpu(hp1).opsize) and
  985. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  986. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  987. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  988. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  989. else*)
  990. { movl const1,[mem1]
  991. movl [mem1],reg1
  992. to
  993. movl const1,reg1
  994. movl reg1,[mem1]
  995. }
  996. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  997. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  998. (taicpu(p).opsize = taicpu(hp1).opsize) and
  999. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1000. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1001. begin
  1002. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1003. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1004. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1005. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1006. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1007. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1008. end
  1009. end
  1010. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1011. GetNextIntruction_p and
  1012. (hp1.typ = ait_instruction) and
  1013. GetNextInstruction(hp1, hp2) and
  1014. MatchInstruction(hp2,A_MOV,[]) and
  1015. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1016. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1017. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1018. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1019. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1020. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1021. ) then
  1022. { change movsX/movzX reg/ref, reg2
  1023. add/sub/or/... reg3/$const, reg2
  1024. mov reg2 reg/ref
  1025. to add/sub/or/... reg3/$const, reg/ref }
  1026. begin
  1027. CopyUsedRegs(TmpUsedRegs);
  1028. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1029. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1030. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1031. begin
  1032. { by example:
  1033. movswl %si,%eax movswl %si,%eax p
  1034. decl %eax addl %edx,%eax hp1
  1035. movw %ax,%si movw %ax,%si hp2
  1036. ->
  1037. movswl %si,%eax movswl %si,%eax p
  1038. decw %eax addw %edx,%eax hp1
  1039. movw %ax,%si movw %ax,%si hp2
  1040. }
  1041. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1042. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1043. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1044. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1045. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1046. {
  1047. ->
  1048. movswl %si,%eax movswl %si,%eax p
  1049. decw %si addw %dx,%si hp1
  1050. movw %ax,%si movw %ax,%si hp2
  1051. }
  1052. case taicpu(hp1).ops of
  1053. 1:
  1054. begin
  1055. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1056. if taicpu(hp1).oper[0]^.typ=top_reg then
  1057. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1058. end;
  1059. 2:
  1060. begin
  1061. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1062. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1063. (taicpu(hp1).opcode<>A_SHL) and
  1064. (taicpu(hp1).opcode<>A_SHR) and
  1065. (taicpu(hp1).opcode<>A_SAR) then
  1066. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1067. end;
  1068. else
  1069. internalerror(2008042701);
  1070. end;
  1071. {
  1072. ->
  1073. decw %si addw %dx,%si p
  1074. }
  1075. asml.remove(p);
  1076. asml.remove(hp2);
  1077. p.Free;
  1078. hp2.Free;
  1079. p := hp1;
  1080. end;
  1081. ReleaseUsedRegs(TmpUsedRegs);
  1082. end
  1083. else if GetNextIntruction_p and
  1084. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1085. GetNextInstruction(hp1, hp2) and
  1086. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1087. MatchOperand(Taicpu(p).oper[0]^,0) and
  1088. (Taicpu(p).oper[1]^.typ = top_reg) and
  1089. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1090. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1091. { mov reg1,0
  1092. bts reg1,operand1 --> mov reg1,operand2
  1093. or reg1,operand2 bts reg1,operand1}
  1094. begin
  1095. Taicpu(hp2).opcode:=A_MOV;
  1096. asml.remove(hp1);
  1097. insertllitem(hp2,hp2.next,hp1);
  1098. asml.remove(p);
  1099. p.free;
  1100. p:=hp1;
  1101. end
  1102. else if GetNextIntruction_p and
  1103. MatchInstruction(hp1,A_LEA,[S_L]) and
  1104. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1105. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1106. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1107. ) or
  1108. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1109. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1110. )
  1111. ) then
  1112. { mov reg1,ref
  1113. lea reg2,[reg1,reg2]
  1114. to
  1115. add reg2,ref}
  1116. begin
  1117. CopyUsedRegs(TmpUsedRegs);
  1118. { reg1 may not be used afterwards }
  1119. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1120. begin
  1121. Taicpu(hp1).opcode:=A_ADD;
  1122. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1123. DebugMsg('Peephole MovLea2Add done',hp1);
  1124. asml.remove(p);
  1125. p.free;
  1126. p:=hp1;
  1127. end;
  1128. ReleaseUsedRegs(TmpUsedRegs);
  1129. end;
  1130. end;
  1131. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1132. var
  1133. TmpUsedRegs : TAllUsedRegs;
  1134. hp1,hp2: tai;
  1135. begin
  1136. Result:=false;
  1137. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1138. GetNextInstruction(p, hp1) and
  1139. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1140. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1141. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1142. or
  1143. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1144. ) and
  1145. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1146. { mov reg1, reg2
  1147. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1148. begin
  1149. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1150. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1151. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1152. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1153. asml.remove(p);
  1154. p.free;
  1155. p := hp1;
  1156. Result:=true;
  1157. exit;
  1158. end
  1159. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1160. GetNextInstruction(p,hp1) and
  1161. (hp1.typ = ait_instruction) and
  1162. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1163. doing it separately in both branches allows to do the cheap checks
  1164. with low probability earlier }
  1165. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1166. GetNextInstruction(hp1,hp2) and
  1167. MatchInstruction(hp2,A_MOV,[])
  1168. ) or
  1169. ((taicpu(hp1).opcode=A_LEA) and
  1170. GetNextInstruction(hp1,hp2) and
  1171. MatchInstruction(hp2,A_MOV,[]) and
  1172. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1173. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1174. ) or
  1175. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1176. taicpu(p).oper[1]^.reg) and
  1177. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1178. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1179. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1180. ) and
  1181. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1182. )
  1183. ) and
  1184. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1185. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1186. begin
  1187. CopyUsedRegs(TmpUsedRegs);
  1188. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1189. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1190. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1191. { change mov (ref), reg
  1192. add/sub/or/... reg2/$const, reg
  1193. mov reg, (ref)
  1194. # release reg
  1195. to add/sub/or/... reg2/$const, (ref) }
  1196. begin
  1197. case taicpu(hp1).opcode of
  1198. A_INC,A_DEC,A_NOT,A_NEG :
  1199. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1200. A_LEA :
  1201. begin
  1202. taicpu(hp1).opcode:=A_ADD;
  1203. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1204. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1205. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1206. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1207. else
  1208. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1209. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1210. DebugMsg('Peephole FoldLea done',hp1);
  1211. end
  1212. else
  1213. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1214. end;
  1215. asml.remove(p);
  1216. asml.remove(hp2);
  1217. p.free;
  1218. hp2.free;
  1219. p := hp1
  1220. end;
  1221. ReleaseUsedRegs(TmpUsedRegs);
  1222. end;
  1223. end;
  1224. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1225. var
  1226. TmpUsedRegs : TAllUsedRegs;
  1227. hp1 : tai;
  1228. begin
  1229. Result:=false;
  1230. if (taicpu(p).ops >= 2) and
  1231. ((taicpu(p).oper[0]^.typ = top_const) or
  1232. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1233. (taicpu(p).oper[1]^.typ = top_reg) and
  1234. ((taicpu(p).ops = 2) or
  1235. ((taicpu(p).oper[2]^.typ = top_reg) and
  1236. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1237. GetLastInstruction(p,hp1) and
  1238. MatchInstruction(hp1,A_MOV,[]) and
  1239. MatchOpType(hp1,top_reg,top_reg) and
  1240. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1241. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1242. begin
  1243. CopyUsedRegs(TmpUsedRegs);
  1244. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1245. { change
  1246. mov reg1,reg2
  1247. imul y,reg2 to imul y,reg1,reg2 }
  1248. begin
  1249. taicpu(p).ops := 3;
  1250. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1251. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1252. DebugMsg('Peephole MovImul2Imul done',p);
  1253. asml.remove(hp1);
  1254. hp1.free;
  1255. result:=true;
  1256. end;
  1257. ReleaseUsedRegs(TmpUsedRegs);
  1258. end;
  1259. end;
  1260. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1261. var
  1262. hp1 : tai;
  1263. begin
  1264. Result:=false;
  1265. if not(GetNextInstruction(p, hp1)) then
  1266. exit;
  1267. if MatchOpType(p,top_const,top_reg) and
  1268. MatchInstruction(hp1,A_AND,[]) and
  1269. MatchOpType(hp1,top_const,top_reg) and
  1270. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1271. { the second register must contain the first one, so compare their subreg types }
  1272. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1273. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1274. { change
  1275. and const1, reg
  1276. and const2, reg
  1277. to
  1278. and (const1 and const2), reg
  1279. }
  1280. begin
  1281. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1282. DebugMsg('Peephole AndAnd2And done',hp1);
  1283. asml.remove(p);
  1284. p.Free;
  1285. p:=hp1;
  1286. Result:=true;
  1287. exit;
  1288. end
  1289. else if MatchOpType(p,top_const,top_reg) and
  1290. MatchInstruction(hp1,A_MOVZX,[]) and
  1291. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1292. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1293. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1294. (((taicpu(p).opsize=S_W) and
  1295. (taicpu(hp1).opsize=S_BW)) or
  1296. ((taicpu(p).opsize=S_L) and
  1297. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1298. {$ifdef x86_64}
  1299. or
  1300. ((taicpu(p).opsize=S_Q) and
  1301. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1302. {$endif x86_64}
  1303. ) then
  1304. begin
  1305. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1306. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1307. ) or
  1308. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1309. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1310. {$ifdef x86_64}
  1311. or
  1312. (((taicpu(hp1).opsize)=S_LQ) and
  1313. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1314. )
  1315. {$endif x86_64}
  1316. then
  1317. begin
  1318. DebugMsg('Peephole AndMovzToAnd done',p);
  1319. asml.remove(hp1);
  1320. hp1.free;
  1321. end;
  1322. end
  1323. else if MatchOpType(p,top_const,top_reg) and
  1324. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1325. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1326. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1327. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1328. (((taicpu(p).opsize=S_W) and
  1329. (taicpu(hp1).opsize=S_BW)) or
  1330. ((taicpu(p).opsize=S_L) and
  1331. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1332. {$ifdef x86_64}
  1333. or
  1334. ((taicpu(p).opsize=S_Q) and
  1335. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1336. {$endif x86_64}
  1337. ) then
  1338. begin
  1339. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1340. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1341. ) or
  1342. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1343. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1344. {$ifdef x86_64}
  1345. or
  1346. (((taicpu(hp1).opsize)=S_LQ) and
  1347. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1348. )
  1349. {$endif x86_64}
  1350. then
  1351. begin
  1352. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1353. asml.remove(hp1);
  1354. hp1.free;
  1355. end;
  1356. end
  1357. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1358. (hp1.typ = ait_instruction) and
  1359. (taicpu(hp1).is_jmp) and
  1360. (taicpu(hp1).opcode<>A_JMP) and
  1361. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1362. { change
  1363. and x, reg
  1364. jxx
  1365. to
  1366. test x, reg
  1367. jxx
  1368. if reg is deallocated before the
  1369. jump, but only if it's a conditional jump (PFV)
  1370. }
  1371. taicpu(p).opcode := A_TEST;
  1372. end;
  1373. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1374. begin
  1375. if MatchOperand(taicpu(p).oper[0]^,0) and
  1376. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1377. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1378. { change "mov $0, %reg" into "xor %reg, %reg" }
  1379. begin
  1380. taicpu(p).opcode := A_XOR;
  1381. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1382. end;
  1383. end;
  1384. end.