aoptx86.pas 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. function OptPass2Jmp(var p : tai) : boolean;
  46. procedure DebugMsg(const s : string; p : tai);inline;
  47. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  48. class function IsExitCode(p : tai) : boolean;
  49. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  50. procedure RemoveLastDeallocForFuncRes(p : tai);
  51. end;
  52. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  55. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  56. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  57. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  58. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  59. function RefsEqual(const r1, r2: treference): boolean;
  60. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  61. { returns true, if ref is a reference using only the registers passed as base and index
  62. and having an offset }
  63. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  64. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  65. implementation
  66. uses
  67. cutils,
  68. verbose,
  69. procinfo,
  70. aasmbase,
  71. aoptutils,
  72. symconst,symsym,
  73. itcpugas;
  74. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  75. begin
  76. result :=
  77. (instr.typ = ait_instruction) and
  78. (taicpu(instr).opcode = op) and
  79. ((opsize = []) or (taicpu(instr).opsize in opsize));
  80. end;
  81. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. ((taicpu(instr).opcode = op1) or
  86. (taicpu(instr).opcode = op2)
  87. ) and
  88. ((opsize = []) or (taicpu(instr).opsize in opsize));
  89. end;
  90. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. ((taicpu(instr).opcode = op1) or
  95. (taicpu(instr).opcode = op2) or
  96. (taicpu(instr).opcode = op3)
  97. ) and
  98. ((opsize = []) or (taicpu(instr).opsize in opsize));
  99. end;
  100. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  101. const opsize : topsizes) : boolean;
  102. var
  103. op : TAsmOp;
  104. begin
  105. result:=false;
  106. for op in ops do
  107. begin
  108. if (instr.typ = ait_instruction) and
  109. (taicpu(instr).opcode = op) and
  110. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  111. begin
  112. result:=true;
  113. exit;
  114. end;
  115. end;
  116. end;
  117. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  118. begin
  119. result := (oper.typ = top_reg) and (oper.reg = reg);
  120. end;
  121. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  122. begin
  123. result := (oper.typ = top_const) and (oper.val = a);
  124. end;
  125. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  126. begin
  127. result := oper1.typ = oper2.typ;
  128. if result then
  129. case oper1.typ of
  130. top_const:
  131. Result:=oper1.val = oper2.val;
  132. top_reg:
  133. Result:=oper1.reg = oper2.reg;
  134. top_ref:
  135. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  136. else
  137. internalerror(2013102801);
  138. end
  139. end;
  140. function RefsEqual(const r1, r2: treference): boolean;
  141. begin
  142. RefsEqual :=
  143. (r1.offset = r2.offset) and
  144. (r1.segment = r2.segment) and (r1.base = r2.base) and
  145. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  146. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  147. (r1.relsymbol = r2.relsymbol);
  148. end;
  149. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  150. begin
  151. Result:=(ref.offset=0) and
  152. (ref.scalefactor in [0,1]) and
  153. (ref.segment=NR_NO) and
  154. (ref.symbol=nil) and
  155. (ref.relsymbol=nil) and
  156. ((base=NR_INVALID) or
  157. (ref.base=base)) and
  158. ((index=NR_INVALID) or
  159. (ref.index=index));
  160. end;
  161. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  162. begin
  163. Result:=(ref.scalefactor in [0,1]) and
  164. (ref.segment=NR_NO) and
  165. (ref.symbol=nil) and
  166. (ref.relsymbol=nil) and
  167. ((base=NR_INVALID) or
  168. (ref.base=base)) and
  169. ((index=NR_INVALID) or
  170. (ref.index=index));
  171. end;
  172. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  173. begin
  174. Result:=(taicpu(instr).ops=2) and
  175. (taicpu(instr).oper[0]^.typ=ot0) and
  176. (taicpu(instr).oper[1]^.typ=ot1);
  177. end;
  178. {$ifdef DEBUG_AOPTCPU}
  179. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  180. begin
  181. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  182. end;
  183. {$else DEBUG_AOPTCPU}
  184. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  185. begin
  186. end;
  187. {$endif DEBUG_AOPTCPU}
  188. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  189. begin
  190. if not SuperRegistersEqual(reg1,reg2) then
  191. exit(false);
  192. if getregtype(reg1)<>R_INTREGISTER then
  193. exit(true); {because SuperRegisterEqual is true}
  194. case getsubreg(reg1) of
  195. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  196. higher, it preserves the high bits, so the new value depends on
  197. reg2's previous value. In other words, it is equivalent to doing:
  198. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  199. R_SUBL:
  200. exit(getsubreg(reg2)=R_SUBL);
  201. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  202. higher, it actually does a:
  203. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  204. R_SUBH:
  205. exit(getsubreg(reg2)=R_SUBH);
  206. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  207. bits of reg2:
  208. reg2 := (reg2 and $ffff0000) or word(reg1); }
  209. R_SUBW:
  210. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  211. { a write to R_SUBD always overwrites every other subregister,
  212. because it clears the high 32 bits of R_SUBQ on x86_64 }
  213. R_SUBD,
  214. R_SUBQ:
  215. exit(true);
  216. else
  217. internalerror(2017042801);
  218. end;
  219. end;
  220. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  221. begin
  222. if not SuperRegistersEqual(reg1,reg2) then
  223. exit(false);
  224. if getregtype(reg1)<>R_INTREGISTER then
  225. exit(true); {because SuperRegisterEqual is true}
  226. case getsubreg(reg1) of
  227. R_SUBL:
  228. exit(getsubreg(reg2)<>R_SUBH);
  229. R_SUBH:
  230. exit(getsubreg(reg2)<>R_SUBL);
  231. R_SUBW,
  232. R_SUBD,
  233. R_SUBQ:
  234. exit(true);
  235. else
  236. internalerror(2017042802);
  237. end;
  238. end;
  239. { allocates register reg between (and including) instructions p1 and p2
  240. the type of p1 and p2 must not be in SkipInstr
  241. note that this routine is both called from the peephole optimizer
  242. where optinfo is not yet initialised) and from the cse (where it is) }
  243. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  244. var
  245. hp, start: tai;
  246. removedsomething,
  247. firstRemovedWasAlloc,
  248. lastRemovedWasDealloc: boolean;
  249. begin
  250. {$ifdef EXTDEBUG}
  251. { if assigned(p1.optinfo) and
  252. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  253. internalerror(2004101010); }
  254. {$endif EXTDEBUG}
  255. start := p1;
  256. if (reg = NR_ESP) or
  257. (reg = current_procinfo.framepointer) or
  258. not(assigned(p1)) then
  259. { this happens with registers which are loaded implicitely, outside the }
  260. { current block (e.g. esi with self) }
  261. exit;
  262. { make sure we allocate it for this instruction }
  263. getnextinstruction(p2,p2);
  264. lastRemovedWasDealloc := false;
  265. removedSomething := false;
  266. firstRemovedWasAlloc := false;
  267. {$ifdef allocregdebug}
  268. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' from here...'));
  270. insertllitem(asml,p1.previous,p1,hp);
  271. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  272. ' till here...'));
  273. insertllitem(asml,p2,p2.next,hp);
  274. {$endif allocregdebug}
  275. { do it the safe way: always allocate the full super register,
  276. as we do no register re-allocation in the peephole optimizer,
  277. this does not hurt
  278. }
  279. case getregtype(reg) of
  280. R_MMREGISTER:
  281. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  282. R_INTREGISTER:
  283. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  284. end;
  285. if not(RegInUsedRegs(reg,initialusedregs)) then
  286. begin
  287. hp := tai_regalloc.alloc(reg,nil);
  288. insertllItem(p1.previous,p1,hp);
  289. IncludeRegInUsedRegs(reg,initialusedregs);
  290. end;
  291. while assigned(p1) and
  292. (p1 <> p2) do
  293. begin
  294. if assigned(p1.optinfo) then
  295. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  296. p1 := tai(p1.next);
  297. repeat
  298. while assigned(p1) and
  299. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  300. p1 := tai(p1.next);
  301. { remove all allocation/deallocation info about the register in between }
  302. if assigned(p1) and
  303. (p1.typ = ait_regalloc) then
  304. begin
  305. { same super register, different sub register? }
  306. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  307. begin
  308. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  309. internalerror(2016101501);
  310. tai_regalloc(p1).reg:=reg;
  311. end;
  312. if tai_regalloc(p1).reg=reg then
  313. begin
  314. if not removedSomething then
  315. begin
  316. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  317. removedSomething := true;
  318. end;
  319. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  320. hp := tai(p1.Next);
  321. asml.Remove(p1);
  322. p1.free;
  323. p1 := hp;
  324. end
  325. else
  326. p1 := tai(p1.next);
  327. end;
  328. until not(assigned(p1)) or
  329. not(p1.typ in SkipInstr);
  330. end;
  331. if assigned(p1) then
  332. begin
  333. if firstRemovedWasAlloc then
  334. begin
  335. hp := tai_regalloc.Alloc(reg,nil);
  336. insertLLItem(start.previous,start,hp);
  337. end;
  338. if lastRemovedWasDealloc then
  339. begin
  340. hp := tai_regalloc.DeAlloc(reg,nil);
  341. insertLLItem(p1.previous,p1,hp);
  342. end;
  343. end;
  344. end;
  345. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  346. var
  347. p: taicpu;
  348. begin
  349. if not assigned(hp) or
  350. (hp.typ <> ait_instruction) then
  351. begin
  352. Result := false;
  353. exit;
  354. end;
  355. p := taicpu(hp);
  356. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  357. with insprop[p.opcode] do
  358. begin
  359. case getsubreg(reg) of
  360. R_SUBW,R_SUBD,R_SUBQ:
  361. Result:=
  362. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  363. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  364. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  365. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  366. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  367. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  368. R_SUBFLAGCARRY:
  369. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  370. R_SUBFLAGPARITY:
  371. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  372. R_SUBFLAGAUXILIARY:
  373. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  374. R_SUBFLAGZERO:
  375. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  376. R_SUBFLAGSIGN:
  377. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  378. R_SUBFLAGOVERFLOW:
  379. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  380. R_SUBFLAGINTERRUPT:
  381. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  382. R_SUBFLAGDIRECTION:
  383. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  384. else
  385. internalerror(2017050501);
  386. end;
  387. exit;
  388. end;
  389. Result :=
  390. (((p.opcode = A_MOV) or
  391. (p.opcode = A_MOVZX) or
  392. (p.opcode = A_MOVSX) or
  393. (p.opcode = A_LEA) or
  394. (p.opcode = A_VMOVSS) or
  395. (p.opcode = A_VMOVSD) or
  396. (p.opcode = A_VMOVAPD) or
  397. (p.opcode = A_VMOVAPS) or
  398. (p.opcode = A_VMOVQ) or
  399. (p.opcode = A_MOVSS) or
  400. (p.opcode = A_MOVSD) or
  401. (p.opcode = A_MOVQ) or
  402. (p.opcode = A_MOVAPD) or
  403. (p.opcode = A_MOVAPS) or
  404. {$ifndef x86_64}
  405. (p.opcode = A_LDS) or
  406. (p.opcode = A_LES) or
  407. {$endif not x86_64}
  408. (p.opcode = A_LFS) or
  409. (p.opcode = A_LGS) or
  410. (p.opcode = A_LSS)) and
  411. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  412. (p.oper[1]^.typ = top_reg) and
  413. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  414. ((p.oper[0]^.typ = top_const) or
  415. ((p.oper[0]^.typ = top_reg) and
  416. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  417. ((p.oper[0]^.typ = top_ref) and
  418. not RegInRef(reg,p.oper[0]^.ref^)))) or
  419. ((p.opcode = A_POP) and
  420. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  421. ((p.opcode = A_IMUL) and
  422. (p.ops=3) and
  423. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  424. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  425. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  426. ((((p.opcode = A_IMUL) or
  427. (p.opcode = A_MUL)) and
  428. (p.ops=1)) and
  429. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  430. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  431. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  432. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  433. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  434. {$ifdef x86_64}
  435. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  436. {$endif x86_64}
  437. )) or
  438. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  439. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  440. {$ifdef x86_64}
  441. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  442. {$endif x86_64}
  443. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  444. {$ifndef x86_64}
  445. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  446. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  447. {$endif not x86_64}
  448. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  449. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  450. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  451. {$ifndef x86_64}
  452. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  453. {$endif not x86_64}
  454. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  455. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  456. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  457. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  458. {$ifdef x86_64}
  459. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  460. {$endif x86_64}
  461. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  462. (((p.opcode = A_FSTSW) or
  463. (p.opcode = A_FNSTSW)) and
  464. (p.oper[0]^.typ=top_reg) and
  465. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  466. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  467. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  468. (p.oper[0]^.reg=p.oper[1]^.reg) and
  469. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  470. end;
  471. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  472. var
  473. hp2,hp3 : tai;
  474. begin
  475. result:=(p.typ=ait_instruction) and
  476. ((taicpu(p).opcode = A_RET) or
  477. ((taicpu(p).opcode=A_LEAVE) and
  478. GetNextInstruction(p,hp2) and
  479. (hp2.typ=ait_instruction) and
  480. (taicpu(hp2).opcode=A_RET)
  481. ) or
  482. ((taicpu(p).opcode=A_MOV) and
  483. (taicpu(p).oper[0]^.typ=top_reg) and
  484. (taicpu(p).oper[0]^.reg=NR_EBP) and
  485. (taicpu(p).oper[1]^.typ=top_reg) and
  486. (taicpu(p).oper[1]^.reg=NR_ESP) and
  487. GetNextInstruction(p,hp2) and
  488. (hp2.typ=ait_instruction) and
  489. (taicpu(hp2).opcode=A_POP) and
  490. (taicpu(hp2).oper[0]^.typ=top_reg) and
  491. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  492. GetNextInstruction(hp2,hp3) and
  493. (hp3.typ=ait_instruction) and
  494. (taicpu(hp3).opcode=A_RET)
  495. )
  496. );
  497. end;
  498. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  499. begin
  500. isFoldableArithOp := False;
  501. case hp1.opcode of
  502. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  503. isFoldableArithOp :=
  504. ((taicpu(hp1).oper[0]^.typ = top_const) or
  505. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  506. (taicpu(hp1).oper[0]^.reg <> reg))) and
  507. (taicpu(hp1).oper[1]^.typ = top_reg) and
  508. (taicpu(hp1).oper[1]^.reg = reg);
  509. A_INC,A_DEC,A_NEG,A_NOT:
  510. isFoldableArithOp :=
  511. (taicpu(hp1).oper[0]^.typ = top_reg) and
  512. (taicpu(hp1).oper[0]^.reg = reg);
  513. end;
  514. end;
  515. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  516. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  517. var
  518. hp2: tai;
  519. begin
  520. hp2 := p;
  521. repeat
  522. hp2 := tai(hp2.previous);
  523. if assigned(hp2) and
  524. (hp2.typ = ait_regalloc) and
  525. (tai_regalloc(hp2).ratype=ra_dealloc) and
  526. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  527. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  528. begin
  529. asml.remove(hp2);
  530. hp2.free;
  531. break;
  532. end;
  533. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  534. end;
  535. begin
  536. case current_procinfo.procdef.returndef.typ of
  537. arraydef,recorddef,pointerdef,
  538. stringdef,enumdef,procdef,objectdef,errordef,
  539. filedef,setdef,procvardef,
  540. classrefdef,forwarddef:
  541. DoRemoveLastDeallocForFuncRes(RS_EAX);
  542. orddef:
  543. if current_procinfo.procdef.returndef.size <> 0 then
  544. begin
  545. DoRemoveLastDeallocForFuncRes(RS_EAX);
  546. { for int64/qword }
  547. if current_procinfo.procdef.returndef.size = 8 then
  548. DoRemoveLastDeallocForFuncRes(RS_EDX);
  549. end;
  550. end;
  551. end;
  552. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  553. var
  554. TmpUsedRegs : TAllUsedRegs;
  555. hp1,hp2 : tai;
  556. begin
  557. result:=false;
  558. if MatchOpType(taicpu(p),top_reg,top_reg) then
  559. begin
  560. { vmova* reg1,reg1
  561. =>
  562. <nop> }
  563. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  564. begin
  565. GetNextInstruction(p,hp1);
  566. asml.Remove(p);
  567. p.Free;
  568. p:=hp1;
  569. result:=true;
  570. end
  571. else if GetNextInstruction(p,hp1) then
  572. begin
  573. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  574. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  575. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  576. begin
  577. { vmova* reg1,reg2
  578. vmova* reg2,reg3
  579. dealloc reg2
  580. =>
  581. vmova* reg1,reg3 }
  582. CopyUsedRegs(TmpUsedRegs);
  583. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  584. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  585. begin
  586. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  587. asml.Remove(hp1);
  588. hp1.Free;
  589. result:=true;
  590. end
  591. { special case:
  592. vmova* reg1,reg2
  593. vmova* reg2,reg1
  594. =>
  595. vmova* reg1,reg2 }
  596. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  597. begin
  598. asml.Remove(hp1);
  599. hp1.Free;
  600. result:=true;
  601. end
  602. end
  603. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  604. { we mix single and double opperations here because we assume that the compiler
  605. generates vmovapd only after double operations and vmovaps only after single operations }
  606. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  607. GetNextInstruction(hp1,hp2) and
  608. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  609. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  610. begin
  611. CopyUsedRegs(TmpUsedRegs);
  612. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  613. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  614. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  615. then
  616. begin
  617. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  618. asml.Remove(p);
  619. p.Free;
  620. asml.Remove(hp2);
  621. hp2.Free;
  622. p:=hp1;
  623. end;
  624. end;
  625. end;
  626. end;
  627. end;
  628. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  629. var
  630. TmpUsedRegs : TAllUsedRegs;
  631. hp1 : tai;
  632. begin
  633. result:=false;
  634. if GetNextInstruction(p,hp1) and
  635. { we mix single and double opperations here because we assume that the compiler
  636. generates vmovapd only after double operations and vmovaps only after single operations }
  637. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  638. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  639. (taicpu(hp1).oper[1]^.typ=top_reg) then
  640. begin
  641. CopyUsedRegs(TmpUsedRegs);
  642. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  643. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  644. ) then
  645. begin
  646. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  647. asml.Remove(hp1);
  648. hp1.Free;
  649. result:=true;
  650. end;
  651. end;
  652. end;
  653. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  654. var
  655. hp1, hp2: tai;
  656. TmpUsedRegs : TAllUsedRegs;
  657. GetNextIntruction_p : Boolean;
  658. begin
  659. Result:=false;
  660. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  661. if GetNextIntruction_p and
  662. MatchInstruction(hp1,A_AND,[]) and
  663. (taicpu(p).oper[1]^.typ = top_reg) and
  664. MatchOpType(taicpu(hp1),top_const,top_reg) and
  665. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  666. case taicpu(p).opsize Of
  667. S_L:
  668. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  669. begin
  670. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  671. asml.remove(hp1);
  672. hp1.free;
  673. Result:=true;
  674. exit;
  675. end;
  676. end
  677. else if GetNextIntruction_p and
  678. MatchInstruction(hp1,A_MOV,[]) and
  679. (taicpu(p).oper[1]^.typ = top_reg) and
  680. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  681. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  682. begin
  683. CopyUsedRegs(TmpUsedRegs);
  684. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  685. { we have
  686. mov x, %treg
  687. mov %treg, y
  688. }
  689. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  690. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  691. { we've got
  692. mov x, %treg
  693. mov %treg, y
  694. with %treg is not used after }
  695. case taicpu(p).oper[0]^.typ Of
  696. top_reg:
  697. begin
  698. { change
  699. mov %reg, %treg
  700. mov %treg, y
  701. to
  702. mov %reg, y
  703. }
  704. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  705. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  706. asml.remove(hp1);
  707. hp1.free;
  708. ReleaseUsedRegs(TmpUsedRegs);
  709. Exit;
  710. end;
  711. top_ref:
  712. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  713. begin
  714. { change
  715. mov mem, %treg
  716. mov %treg, %reg
  717. to
  718. mov mem, %reg"
  719. }
  720. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  721. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  722. asml.remove(hp1);
  723. hp1.free;
  724. ReleaseUsedRegs(TmpUsedRegs);
  725. Exit;
  726. end;
  727. end;
  728. ReleaseUsedRegs(TmpUsedRegs);
  729. end
  730. else
  731. { Change
  732. mov %reg1, %reg2
  733. xxx %reg2, ???
  734. to
  735. mov %reg1, %reg2
  736. xxx %reg1, ???
  737. to avoid a write/read penalty
  738. }
  739. if MatchOpType(taicpu(p),top_reg,top_reg) and
  740. GetNextInstruction(p,hp1) and
  741. (tai(hp1).typ = ait_instruction) and
  742. (taicpu(hp1).ops >= 1) and
  743. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  744. { we have
  745. mov %reg1, %reg2
  746. XXX %reg2, ???
  747. }
  748. begin
  749. if ((taicpu(hp1).opcode = A_OR) or
  750. (taicpu(hp1).opcode = A_TEST)) and
  751. (taicpu(hp1).oper[1]^.typ = top_reg) and
  752. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  753. { we have
  754. mov %reg1, %reg2
  755. test/or %reg2, %reg2
  756. }
  757. begin
  758. CopyUsedRegs(TmpUsedRegs);
  759. { reg1 will be used after the first instruction,
  760. so update the allocation info }
  761. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  762. if GetNextInstruction(hp1, hp2) and
  763. (hp2.typ = ait_instruction) and
  764. taicpu(hp2).is_jmp and
  765. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  766. { change
  767. mov %reg1, %reg2
  768. test/or %reg2, %reg2
  769. jxx
  770. to
  771. test %reg1, %reg1
  772. jxx
  773. }
  774. begin
  775. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  776. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  777. asml.remove(p);
  778. p.free;
  779. p := hp1;
  780. ReleaseUsedRegs(TmpUsedRegs);
  781. Exit;
  782. end
  783. else
  784. { change
  785. mov %reg1, %reg2
  786. test/or %reg2, %reg2
  787. to
  788. mov %reg1, %reg2
  789. test/or %reg1, %reg1
  790. }
  791. begin
  792. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  793. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  794. end;
  795. ReleaseUsedRegs(TmpUsedRegs);
  796. end
  797. end
  798. else
  799. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  800. x >= RetOffset) as it doesn't do anything (it writes either to a
  801. parameter or to the temporary storage room for the function
  802. result)
  803. }
  804. if GetNextIntruction_p and
  805. (tai(hp1).typ = ait_instruction) then
  806. begin
  807. if IsExitCode(hp1) and
  808. MatchOpType(p,top_reg,top_ref) and
  809. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  810. not(assigned(current_procinfo.procdef.funcretsym) and
  811. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  812. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  813. begin
  814. asml.remove(p);
  815. p.free;
  816. p:=hp1;
  817. DebugMsg('Peephole removed deadstore before leave/ret',p);
  818. RemoveLastDeallocForFuncRes(p);
  819. exit;
  820. end
  821. { change
  822. mov reg1, mem1
  823. test/cmp x, mem1
  824. to
  825. mov reg1, mem1
  826. test/cmp x, reg1
  827. }
  828. else if MatchOpType(p,top_reg,top_ref) and
  829. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  830. (taicpu(hp1).oper[1]^.typ = top_ref) and
  831. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  832. begin
  833. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  834. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  835. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  836. end;
  837. end;
  838. { Next instruction is also a MOV ? }
  839. if GetNextIntruction_p and
  840. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  841. begin
  842. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  843. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  844. { mov reg1, mem1 or mov mem1, reg1
  845. mov mem2, reg2 mov reg2, mem2}
  846. begin
  847. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  848. { mov reg1, mem1 or mov mem1, reg1
  849. mov mem2, reg1 mov reg2, mem1}
  850. begin
  851. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  852. { Removes the second statement from
  853. mov reg1, mem1/reg2
  854. mov mem1/reg2, reg1 }
  855. begin
  856. if taicpu(p).oper[0]^.typ=top_reg then
  857. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  858. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  859. asml.remove(hp1);
  860. hp1.free;
  861. Result:=true;
  862. exit;
  863. end
  864. else
  865. begin
  866. CopyUsedRegs(TmpUsedRegs);
  867. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  868. if (taicpu(p).oper[1]^.typ = top_ref) and
  869. { mov reg1, mem1
  870. mov mem2, reg1 }
  871. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  872. GetNextInstruction(hp1, hp2) and
  873. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  874. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  875. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  876. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  877. { change to
  878. mov reg1, mem1 mov reg1, mem1
  879. mov mem2, reg1 cmp reg1, mem2
  880. cmp mem1, reg1
  881. }
  882. begin
  883. asml.remove(hp2);
  884. hp2.free;
  885. taicpu(hp1).opcode := A_CMP;
  886. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  887. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  888. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  889. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  890. end;
  891. ReleaseUsedRegs(TmpUsedRegs);
  892. end;
  893. end
  894. else if (taicpu(p).oper[1]^.typ=top_ref) and
  895. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  896. begin
  897. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  898. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  899. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  900. end
  901. else
  902. begin
  903. CopyUsedRegs(TmpUsedRegs);
  904. if GetNextInstruction(hp1, hp2) and
  905. MatchOpType(taicpu(p),top_ref,top_reg) and
  906. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  907. (taicpu(hp1).oper[1]^.typ = top_ref) and
  908. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  909. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  910. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  911. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  912. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  913. { mov mem1, %reg1
  914. mov %reg1, mem2
  915. mov mem2, reg2
  916. to:
  917. mov mem1, reg2
  918. mov reg2, mem2}
  919. begin
  920. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  921. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  922. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  923. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  924. asml.remove(hp2);
  925. hp2.free;
  926. end
  927. {$ifdef i386}
  928. { this is enabled for i386 only, as the rules to create the reg sets below
  929. are too complicated for x86-64, so this makes this code too error prone
  930. on x86-64
  931. }
  932. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  933. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  934. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  935. { mov mem1, reg1 mov mem1, reg1
  936. mov reg1, mem2 mov reg1, mem2
  937. mov mem2, reg2 mov mem2, reg1
  938. to: to:
  939. mov mem1, reg1 mov mem1, reg1
  940. mov mem1, reg2 mov reg1, mem2
  941. mov reg1, mem2
  942. or (if mem1 depends on reg1
  943. and/or if mem2 depends on reg2)
  944. to:
  945. mov mem1, reg1
  946. mov reg1, mem2
  947. mov reg1, reg2
  948. }
  949. begin
  950. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  951. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  952. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  953. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  954. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  955. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  956. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  957. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  958. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  959. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  960. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  961. end
  962. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  963. begin
  964. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  965. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  966. end
  967. else
  968. begin
  969. asml.remove(hp2);
  970. hp2.free;
  971. end
  972. {$endif i386}
  973. ;
  974. ReleaseUsedRegs(TmpUsedRegs);
  975. end;
  976. end
  977. (* { movl [mem1],reg1
  978. movl [mem1],reg2
  979. to
  980. movl [mem1],reg1
  981. movl reg1,reg2
  982. }
  983. else if (taicpu(p).oper[0]^.typ = top_ref) and
  984. (taicpu(p).oper[1]^.typ = top_reg) and
  985. (taicpu(hp1).oper[0]^.typ = top_ref) and
  986. (taicpu(hp1).oper[1]^.typ = top_reg) and
  987. (taicpu(p).opsize = taicpu(hp1).opsize) and
  988. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  989. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  990. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  991. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  992. else*)
  993. { movl const1,[mem1]
  994. movl [mem1],reg1
  995. to
  996. movl const1,reg1
  997. movl reg1,[mem1]
  998. }
  999. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1000. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1001. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1002. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1003. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1004. begin
  1005. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1006. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1007. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1008. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1009. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1010. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1011. end
  1012. end
  1013. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1014. GetNextIntruction_p and
  1015. (hp1.typ = ait_instruction) and
  1016. GetNextInstruction(hp1, hp2) and
  1017. MatchInstruction(hp2,A_MOV,[]) and
  1018. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1019. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1020. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1021. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1022. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1023. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1024. ) then
  1025. { change movsX/movzX reg/ref, reg2
  1026. add/sub/or/... reg3/$const, reg2
  1027. mov reg2 reg/ref
  1028. to add/sub/or/... reg3/$const, reg/ref }
  1029. begin
  1030. CopyUsedRegs(TmpUsedRegs);
  1031. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1032. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1033. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1034. begin
  1035. { by example:
  1036. movswl %si,%eax movswl %si,%eax p
  1037. decl %eax addl %edx,%eax hp1
  1038. movw %ax,%si movw %ax,%si hp2
  1039. ->
  1040. movswl %si,%eax movswl %si,%eax p
  1041. decw %eax addw %edx,%eax hp1
  1042. movw %ax,%si movw %ax,%si hp2
  1043. }
  1044. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1045. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1046. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1047. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1048. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1049. {
  1050. ->
  1051. movswl %si,%eax movswl %si,%eax p
  1052. decw %si addw %dx,%si hp1
  1053. movw %ax,%si movw %ax,%si hp2
  1054. }
  1055. case taicpu(hp1).ops of
  1056. 1:
  1057. begin
  1058. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1059. if taicpu(hp1).oper[0]^.typ=top_reg then
  1060. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1061. end;
  1062. 2:
  1063. begin
  1064. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1065. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1066. (taicpu(hp1).opcode<>A_SHL) and
  1067. (taicpu(hp1).opcode<>A_SHR) and
  1068. (taicpu(hp1).opcode<>A_SAR) then
  1069. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1070. end;
  1071. else
  1072. internalerror(2008042701);
  1073. end;
  1074. {
  1075. ->
  1076. decw %si addw %dx,%si p
  1077. }
  1078. asml.remove(p);
  1079. asml.remove(hp2);
  1080. p.Free;
  1081. hp2.Free;
  1082. p := hp1;
  1083. end;
  1084. ReleaseUsedRegs(TmpUsedRegs);
  1085. end
  1086. else if GetNextIntruction_p and
  1087. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1088. GetNextInstruction(hp1, hp2) and
  1089. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1090. MatchOperand(Taicpu(p).oper[0]^,0) and
  1091. (Taicpu(p).oper[1]^.typ = top_reg) and
  1092. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1093. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1094. { mov reg1,0
  1095. bts reg1,operand1 --> mov reg1,operand2
  1096. or reg1,operand2 bts reg1,operand1}
  1097. begin
  1098. Taicpu(hp2).opcode:=A_MOV;
  1099. asml.remove(hp1);
  1100. insertllitem(hp2,hp2.next,hp1);
  1101. asml.remove(p);
  1102. p.free;
  1103. p:=hp1;
  1104. end
  1105. else if GetNextIntruction_p and
  1106. MatchInstruction(hp1,A_LEA,[S_L]) and
  1107. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1108. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1109. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1110. ) or
  1111. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1112. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1113. )
  1114. ) then
  1115. { mov reg1,ref
  1116. lea reg2,[reg1,reg2]
  1117. to
  1118. add reg2,ref}
  1119. begin
  1120. CopyUsedRegs(TmpUsedRegs);
  1121. { reg1 may not be used afterwards }
  1122. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1123. begin
  1124. Taicpu(hp1).opcode:=A_ADD;
  1125. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1126. DebugMsg('Peephole MovLea2Add done',hp1);
  1127. asml.remove(p);
  1128. p.free;
  1129. p:=hp1;
  1130. end;
  1131. ReleaseUsedRegs(TmpUsedRegs);
  1132. end;
  1133. end;
  1134. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1135. var
  1136. TmpUsedRegs : TAllUsedRegs;
  1137. hp1,hp2: tai;
  1138. begin
  1139. Result:=false;
  1140. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1141. GetNextInstruction(p, hp1) and
  1142. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1143. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1144. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1145. or
  1146. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1147. ) and
  1148. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1149. { mov reg1, reg2
  1150. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1151. begin
  1152. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1153. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1154. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1155. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1156. asml.remove(p);
  1157. p.free;
  1158. p := hp1;
  1159. Result:=true;
  1160. exit;
  1161. end
  1162. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1163. GetNextInstruction(p,hp1) and
  1164. (hp1.typ = ait_instruction) and
  1165. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1166. doing it separately in both branches allows to do the cheap checks
  1167. with low probability earlier }
  1168. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1169. GetNextInstruction(hp1,hp2) and
  1170. MatchInstruction(hp2,A_MOV,[])
  1171. ) or
  1172. ((taicpu(hp1).opcode=A_LEA) and
  1173. GetNextInstruction(hp1,hp2) and
  1174. MatchInstruction(hp2,A_MOV,[]) and
  1175. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1176. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1177. ) or
  1178. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1179. taicpu(p).oper[1]^.reg) and
  1180. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1181. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1182. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1183. ) and
  1184. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1185. )
  1186. ) and
  1187. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1188. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1189. begin
  1190. CopyUsedRegs(TmpUsedRegs);
  1191. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1192. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1193. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1194. { change mov (ref), reg
  1195. add/sub/or/... reg2/$const, reg
  1196. mov reg, (ref)
  1197. # release reg
  1198. to add/sub/or/... reg2/$const, (ref) }
  1199. begin
  1200. case taicpu(hp1).opcode of
  1201. A_INC,A_DEC,A_NOT,A_NEG :
  1202. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1203. A_LEA :
  1204. begin
  1205. taicpu(hp1).opcode:=A_ADD;
  1206. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1207. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1208. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1209. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1210. else
  1211. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1212. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1213. DebugMsg('Peephole FoldLea done',hp1);
  1214. end
  1215. else
  1216. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1217. end;
  1218. asml.remove(p);
  1219. asml.remove(hp2);
  1220. p.free;
  1221. hp2.free;
  1222. p := hp1
  1223. end;
  1224. ReleaseUsedRegs(TmpUsedRegs);
  1225. end;
  1226. end;
  1227. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1228. var
  1229. TmpUsedRegs : TAllUsedRegs;
  1230. hp1 : tai;
  1231. begin
  1232. Result:=false;
  1233. if (taicpu(p).ops >= 2) and
  1234. ((taicpu(p).oper[0]^.typ = top_const) or
  1235. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1236. (taicpu(p).oper[1]^.typ = top_reg) and
  1237. ((taicpu(p).ops = 2) or
  1238. ((taicpu(p).oper[2]^.typ = top_reg) and
  1239. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1240. GetLastInstruction(p,hp1) and
  1241. MatchInstruction(hp1,A_MOV,[]) and
  1242. MatchOpType(hp1,top_reg,top_reg) and
  1243. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1244. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1245. begin
  1246. CopyUsedRegs(TmpUsedRegs);
  1247. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1248. { change
  1249. mov reg1,reg2
  1250. imul y,reg2 to imul y,reg1,reg2 }
  1251. begin
  1252. taicpu(p).ops := 3;
  1253. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1254. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1255. DebugMsg('Peephole MovImul2Imul done',p);
  1256. asml.remove(hp1);
  1257. hp1.free;
  1258. result:=true;
  1259. end;
  1260. ReleaseUsedRegs(TmpUsedRegs);
  1261. end;
  1262. end;
  1263. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1264. var
  1265. hp1 : tai;
  1266. begin
  1267. {
  1268. change
  1269. jmp .L1
  1270. ...
  1271. .L1:
  1272. ret
  1273. into
  1274. ret
  1275. }
  1276. result:=false;
  1277. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1278. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1279. begin
  1280. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1281. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1282. MatchInstruction(hp1,A_RET,[S_NO]) then
  1283. begin
  1284. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1285. taicpu(p).opcode:=A_RET;
  1286. taicpu(p).is_jmp:=false;
  1287. taicpu(p).ops:=taicpu(hp1).ops;
  1288. case taicpu(hp1).ops of
  1289. 0:
  1290. taicpu(p).clearop(0);
  1291. 1:
  1292. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1293. else
  1294. internalerror(2016041301);
  1295. end;
  1296. result:=true;
  1297. end;
  1298. end;
  1299. end;
  1300. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1301. var
  1302. hp1 : tai;
  1303. begin
  1304. Result:=false;
  1305. if not(GetNextInstruction(p, hp1)) then
  1306. exit;
  1307. if MatchOpType(p,top_const,top_reg) and
  1308. MatchInstruction(hp1,A_AND,[]) and
  1309. MatchOpType(hp1,top_const,top_reg) and
  1310. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1311. { the second register must contain the first one, so compare their subreg types }
  1312. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1313. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1314. { change
  1315. and const1, reg
  1316. and const2, reg
  1317. to
  1318. and (const1 and const2), reg
  1319. }
  1320. begin
  1321. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1322. DebugMsg('Peephole AndAnd2And done',hp1);
  1323. asml.remove(p);
  1324. p.Free;
  1325. p:=hp1;
  1326. Result:=true;
  1327. exit;
  1328. end
  1329. else if MatchOpType(p,top_const,top_reg) and
  1330. MatchInstruction(hp1,A_MOVZX,[]) and
  1331. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1332. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1333. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1334. (((taicpu(p).opsize=S_W) and
  1335. (taicpu(hp1).opsize=S_BW)) or
  1336. ((taicpu(p).opsize=S_L) and
  1337. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1338. {$ifdef x86_64}
  1339. or
  1340. ((taicpu(p).opsize=S_Q) and
  1341. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1342. {$endif x86_64}
  1343. ) then
  1344. begin
  1345. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1346. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1347. ) or
  1348. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1349. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1350. {$ifdef x86_64}
  1351. or
  1352. (((taicpu(hp1).opsize)=S_LQ) and
  1353. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1354. )
  1355. {$endif x86_64}
  1356. then
  1357. begin
  1358. DebugMsg('Peephole AndMovzToAnd done',p);
  1359. asml.remove(hp1);
  1360. hp1.free;
  1361. end;
  1362. end
  1363. else if MatchOpType(p,top_const,top_reg) and
  1364. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1365. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1366. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1367. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1368. (((taicpu(p).opsize=S_W) and
  1369. (taicpu(hp1).opsize=S_BW)) or
  1370. ((taicpu(p).opsize=S_L) and
  1371. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1372. {$ifdef x86_64}
  1373. or
  1374. ((taicpu(p).opsize=S_Q) and
  1375. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1376. {$endif x86_64}
  1377. ) then
  1378. begin
  1379. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1380. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1381. ) or
  1382. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1383. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1384. {$ifdef x86_64}
  1385. or
  1386. (((taicpu(hp1).opsize)=S_LQ) and
  1387. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1388. )
  1389. {$endif x86_64}
  1390. then
  1391. begin
  1392. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1393. asml.remove(hp1);
  1394. hp1.free;
  1395. end;
  1396. end
  1397. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1398. (hp1.typ = ait_instruction) and
  1399. (taicpu(hp1).is_jmp) and
  1400. (taicpu(hp1).opcode<>A_JMP) and
  1401. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1402. { change
  1403. and x, reg
  1404. jxx
  1405. to
  1406. test x, reg
  1407. jxx
  1408. if reg is deallocated before the
  1409. jump, but only if it's a conditional jump (PFV)
  1410. }
  1411. taicpu(p).opcode := A_TEST;
  1412. end;
  1413. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1414. begin
  1415. if MatchOperand(taicpu(p).oper[0]^,0) and
  1416. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1417. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1418. { change "mov $0, %reg" into "xor %reg, %reg" }
  1419. begin
  1420. taicpu(p).opcode := A_XOR;
  1421. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1422. end;
  1423. end;
  1424. end.