aoptx86.pas 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  354. with insprop[p.opcode] do
  355. begin
  356. case getsubreg(reg) of
  357. R_SUBW,R_SUBD,R_SUBQ:
  358. Result:=
  359. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  360. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  361. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  362. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  363. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  364. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  365. R_SUBFLAGCARRY:
  366. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  367. R_SUBFLAGPARITY:
  368. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  369. R_SUBFLAGAUXILIARY:
  370. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  371. R_SUBFLAGZERO:
  372. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  373. R_SUBFLAGSIGN:
  374. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  375. R_SUBFLAGOVERFLOW:
  376. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  377. R_SUBFLAGINTERRUPT:
  378. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  379. R_SUBFLAGDIRECTION:
  380. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  381. else
  382. internalerror(2017050501);
  383. end;
  384. exit;
  385. end;
  386. Result :=
  387. (((p.opcode = A_MOV) or
  388. (p.opcode = A_MOVZX) or
  389. (p.opcode = A_MOVSX) or
  390. (p.opcode = A_LEA) or
  391. (p.opcode = A_VMOVSS) or
  392. (p.opcode = A_VMOVSD) or
  393. (p.opcode = A_VMOVAPD) or
  394. (p.opcode = A_VMOVAPS) or
  395. (p.opcode = A_VMOVQ) or
  396. (p.opcode = A_MOVSS) or
  397. (p.opcode = A_MOVSD) or
  398. (p.opcode = A_MOVQ) or
  399. (p.opcode = A_MOVAPD) or
  400. (p.opcode = A_MOVAPS) or
  401. {$ifndef x86_64}
  402. (p.opcode = A_LDS) or
  403. (p.opcode = A_LES) or
  404. {$endif not x86_64}
  405. (p.opcode = A_LFS) or
  406. (p.opcode = A_LGS) or
  407. (p.opcode = A_LSS)) and
  408. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  409. (p.oper[1]^.typ = top_reg) and
  410. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  411. ((p.oper[0]^.typ = top_const) or
  412. ((p.oper[0]^.typ = top_reg) and
  413. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  414. ((p.oper[0]^.typ = top_ref) and
  415. not RegInRef(reg,p.oper[0]^.ref^)))) or
  416. ((p.opcode = A_POP) and
  417. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  418. ((p.opcode = A_IMUL) and
  419. (p.ops=3) and
  420. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  421. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  422. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  423. ((((p.opcode = A_IMUL) or
  424. (p.opcode = A_MUL)) and
  425. (p.ops=1)) and
  426. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  427. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  428. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  429. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  430. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  431. {$ifdef x86_64}
  432. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  433. {$endif x86_64}
  434. )) or
  435. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  436. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  437. {$ifdef x86_64}
  438. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  439. {$endif x86_64}
  440. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  441. {$ifndef x86_64}
  442. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  443. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  444. {$endif not x86_64}
  445. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  446. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  447. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  448. {$ifndef x86_64}
  449. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  450. {$endif not x86_64}
  451. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  452. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  453. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  454. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  455. {$ifdef x86_64}
  456. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  457. {$endif x86_64}
  458. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  459. (((p.opcode = A_FSTSW) or
  460. (p.opcode = A_FNSTSW)) and
  461. (p.oper[0]^.typ=top_reg) and
  462. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  463. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  464. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  465. (p.oper[0]^.reg=p.oper[1]^.reg) and
  466. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  467. end;
  468. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  469. var
  470. hp2,hp3 : tai;
  471. begin
  472. result:=(p.typ=ait_instruction) and
  473. ((taicpu(p).opcode = A_RET) or
  474. ((taicpu(p).opcode=A_LEAVE) and
  475. GetNextInstruction(p,hp2) and
  476. (hp2.typ=ait_instruction) and
  477. (taicpu(hp2).opcode=A_RET)
  478. ) or
  479. ((taicpu(p).opcode=A_MOV) and
  480. (taicpu(p).oper[0]^.typ=top_reg) and
  481. (taicpu(p).oper[0]^.reg=NR_EBP) and
  482. (taicpu(p).oper[1]^.typ=top_reg) and
  483. (taicpu(p).oper[1]^.reg=NR_ESP) and
  484. GetNextInstruction(p,hp2) and
  485. (hp2.typ=ait_instruction) and
  486. (taicpu(hp2).opcode=A_POP) and
  487. (taicpu(hp2).oper[0]^.typ=top_reg) and
  488. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  489. GetNextInstruction(hp2,hp3) and
  490. (hp3.typ=ait_instruction) and
  491. (taicpu(hp3).opcode=A_RET)
  492. )
  493. );
  494. end;
  495. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  496. begin
  497. isFoldableArithOp := False;
  498. case hp1.opcode of
  499. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  500. isFoldableArithOp :=
  501. ((taicpu(hp1).oper[0]^.typ = top_const) or
  502. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  503. (taicpu(hp1).oper[0]^.reg <> reg))) and
  504. (taicpu(hp1).oper[1]^.typ = top_reg) and
  505. (taicpu(hp1).oper[1]^.reg = reg);
  506. A_INC,A_DEC,A_NEG,A_NOT:
  507. isFoldableArithOp :=
  508. (taicpu(hp1).oper[0]^.typ = top_reg) and
  509. (taicpu(hp1).oper[0]^.reg = reg);
  510. end;
  511. end;
  512. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  513. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  514. var
  515. hp2: tai;
  516. begin
  517. hp2 := p;
  518. repeat
  519. hp2 := tai(hp2.previous);
  520. if assigned(hp2) and
  521. (hp2.typ = ait_regalloc) and
  522. (tai_regalloc(hp2).ratype=ra_dealloc) and
  523. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  524. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  525. begin
  526. asml.remove(hp2);
  527. hp2.free;
  528. break;
  529. end;
  530. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  531. end;
  532. begin
  533. case current_procinfo.procdef.returndef.typ of
  534. arraydef,recorddef,pointerdef,
  535. stringdef,enumdef,procdef,objectdef,errordef,
  536. filedef,setdef,procvardef,
  537. classrefdef,forwarddef:
  538. DoRemoveLastDeallocForFuncRes(RS_EAX);
  539. orddef:
  540. if current_procinfo.procdef.returndef.size <> 0 then
  541. begin
  542. DoRemoveLastDeallocForFuncRes(RS_EAX);
  543. { for int64/qword }
  544. if current_procinfo.procdef.returndef.size = 8 then
  545. DoRemoveLastDeallocForFuncRes(RS_EDX);
  546. end;
  547. end;
  548. end;
  549. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  550. var
  551. TmpUsedRegs : TAllUsedRegs;
  552. hp1,hp2 : tai;
  553. begin
  554. result:=false;
  555. if MatchOpType(taicpu(p),top_reg,top_reg) then
  556. begin
  557. { vmova* reg1,reg1
  558. =>
  559. <nop> }
  560. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  561. begin
  562. GetNextInstruction(p,hp1);
  563. asml.Remove(p);
  564. p.Free;
  565. p:=hp1;
  566. result:=true;
  567. end
  568. else if GetNextInstruction(p,hp1) then
  569. begin
  570. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  571. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  572. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  573. begin
  574. { vmova* reg1,reg2
  575. vmova* reg2,reg3
  576. dealloc reg2
  577. =>
  578. vmova* reg1,reg3 }
  579. CopyUsedRegs(TmpUsedRegs);
  580. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  581. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  582. begin
  583. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  584. asml.Remove(hp1);
  585. hp1.Free;
  586. result:=true;
  587. end
  588. { special case:
  589. vmova* reg1,reg2
  590. vmova* reg2,reg1
  591. =>
  592. vmova* reg1,reg2 }
  593. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  594. begin
  595. asml.Remove(hp1);
  596. hp1.Free;
  597. result:=true;
  598. end
  599. end
  600. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  601. { we mix single and double opperations here because we assume that the compiler
  602. generates vmovapd only after double operations and vmovaps only after single operations }
  603. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  604. GetNextInstruction(hp1,hp2) and
  605. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  606. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  607. begin
  608. CopyUsedRegs(TmpUsedRegs);
  609. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  610. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  611. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  612. then
  613. begin
  614. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  615. asml.Remove(p);
  616. p.Free;
  617. asml.Remove(hp2);
  618. hp2.Free;
  619. p:=hp1;
  620. end;
  621. end;
  622. end;
  623. end;
  624. end;
  625. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  626. var
  627. TmpUsedRegs : TAllUsedRegs;
  628. hp1 : tai;
  629. begin
  630. result:=false;
  631. if GetNextInstruction(p,hp1) and
  632. { we mix single and double opperations here because we assume that the compiler
  633. generates vmovapd only after double operations and vmovaps only after single operations }
  634. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  635. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  636. (taicpu(hp1).oper[1]^.typ=top_reg) then
  637. begin
  638. CopyUsedRegs(TmpUsedRegs);
  639. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  640. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  641. ) then
  642. begin
  643. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  644. asml.Remove(hp1);
  645. hp1.Free;
  646. result:=true;
  647. end;
  648. end;
  649. end;
  650. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  651. var
  652. hp1, hp2: tai;
  653. TmpUsedRegs : TAllUsedRegs;
  654. GetNextIntruction_p : Boolean;
  655. begin
  656. Result:=false;
  657. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  658. if GetNextIntruction_p and
  659. MatchInstruction(hp1,A_AND,[]) and
  660. (taicpu(p).oper[1]^.typ = top_reg) and
  661. MatchOpType(taicpu(hp1),top_const,top_reg) and
  662. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  663. case taicpu(p).opsize Of
  664. S_L:
  665. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  666. begin
  667. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  668. asml.remove(hp1);
  669. hp1.free;
  670. Result:=true;
  671. exit;
  672. end;
  673. end
  674. else if GetNextIntruction_p and
  675. MatchInstruction(hp1,A_MOV,[]) and
  676. (taicpu(p).oper[1]^.typ = top_reg) and
  677. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  678. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  679. begin
  680. CopyUsedRegs(TmpUsedRegs);
  681. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  682. { we have
  683. mov x, %treg
  684. mov %treg, y
  685. }
  686. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  687. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  688. { we've got
  689. mov x, %treg
  690. mov %treg, y
  691. with %treg is not used after }
  692. case taicpu(p).oper[0]^.typ Of
  693. top_reg:
  694. begin
  695. { change
  696. mov %reg, %treg
  697. mov %treg, y
  698. to
  699. mov %reg, y
  700. }
  701. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  702. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  703. asml.remove(hp1);
  704. hp1.free;
  705. ReleaseUsedRegs(TmpUsedRegs);
  706. Exit;
  707. end;
  708. top_ref:
  709. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  710. begin
  711. { change
  712. mov mem, %treg
  713. mov %treg, %reg
  714. to
  715. mov mem, %reg"
  716. }
  717. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  718. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  719. asml.remove(hp1);
  720. hp1.free;
  721. ReleaseUsedRegs(TmpUsedRegs);
  722. Exit;
  723. end;
  724. end;
  725. ReleaseUsedRegs(TmpUsedRegs);
  726. end
  727. else
  728. { Change
  729. mov %reg1, %reg2
  730. xxx %reg2, ???
  731. to
  732. mov %reg1, %reg2
  733. xxx %reg1, ???
  734. to avoid a write/read penalty
  735. }
  736. if MatchOpType(taicpu(p),top_reg,top_reg) and
  737. GetNextInstruction(p,hp1) and
  738. (tai(hp1).typ = ait_instruction) and
  739. (taicpu(hp1).ops >= 1) and
  740. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  741. { we have
  742. mov %reg1, %reg2
  743. XXX %reg2, ???
  744. }
  745. begin
  746. if ((taicpu(hp1).opcode = A_OR) or
  747. (taicpu(hp1).opcode = A_TEST)) and
  748. (taicpu(hp1).oper[1]^.typ = top_reg) and
  749. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  750. { we have
  751. mov %reg1, %reg2
  752. test/or %reg2, %reg2
  753. }
  754. begin
  755. CopyUsedRegs(TmpUsedRegs);
  756. { reg1 will be used after the first instruction,
  757. so update the allocation info }
  758. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  759. if GetNextInstruction(hp1, hp2) and
  760. (hp2.typ = ait_instruction) and
  761. taicpu(hp2).is_jmp and
  762. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  763. { change
  764. mov %reg1, %reg2
  765. test/or %reg2, %reg2
  766. jxx
  767. to
  768. test %reg1, %reg1
  769. jxx
  770. }
  771. begin
  772. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  773. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  774. asml.remove(p);
  775. p.free;
  776. p := hp1;
  777. ReleaseUsedRegs(TmpUsedRegs);
  778. Exit;
  779. end
  780. else
  781. { change
  782. mov %reg1, %reg2
  783. test/or %reg2, %reg2
  784. to
  785. mov %reg1, %reg2
  786. test/or %reg1, %reg1
  787. }
  788. begin
  789. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  790. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  791. end;
  792. ReleaseUsedRegs(TmpUsedRegs);
  793. end
  794. end
  795. else
  796. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  797. x >= RetOffset) as it doesn't do anything (it writes either to a
  798. parameter or to the temporary storage room for the function
  799. result)
  800. }
  801. if GetNextIntruction_p and
  802. (tai(hp1).typ = ait_instruction) then
  803. begin
  804. if IsExitCode(hp1) and
  805. MatchOpType(p,top_reg,top_ref) and
  806. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  807. not(assigned(current_procinfo.procdef.funcretsym) and
  808. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  809. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  810. begin
  811. asml.remove(p);
  812. p.free;
  813. p:=hp1;
  814. DebugMsg('Peephole removed deadstore before leave/ret',p);
  815. RemoveLastDeallocForFuncRes(p);
  816. exit;
  817. end
  818. { change
  819. mov reg1, mem1
  820. cmp x, mem1
  821. to
  822. mov reg1, mem1
  823. cmp x, reg1
  824. }
  825. else if MatchOpType(p,top_reg,top_ref) and
  826. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  827. (taicpu(hp1).oper[1]^.typ = top_ref) and
  828. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  829. begin
  830. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  831. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  832. end;
  833. end;
  834. { Next instruction is also a MOV ? }
  835. if GetNextIntruction_p and
  836. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  837. begin
  838. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  839. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  840. { mov reg1, mem1 or mov mem1, reg1
  841. mov mem2, reg2 mov reg2, mem2}
  842. begin
  843. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  844. { mov reg1, mem1 or mov mem1, reg1
  845. mov mem2, reg1 mov reg2, mem1}
  846. begin
  847. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  848. { Removes the second statement from
  849. mov reg1, mem1/reg2
  850. mov mem1/reg2, reg1 }
  851. begin
  852. if taicpu(p).oper[0]^.typ=top_reg then
  853. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  854. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  855. asml.remove(hp1);
  856. hp1.free;
  857. Result:=true;
  858. exit;
  859. end
  860. else
  861. begin
  862. CopyUsedRegs(TmpUsedRegs);
  863. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  864. if (taicpu(p).oper[1]^.typ = top_ref) and
  865. { mov reg1, mem1
  866. mov mem2, reg1 }
  867. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  868. GetNextInstruction(hp1, hp2) and
  869. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  870. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  871. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  872. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  873. { change to
  874. mov reg1, mem1 mov reg1, mem1
  875. mov mem2, reg1 cmp reg1, mem2
  876. cmp mem1, reg1
  877. }
  878. begin
  879. asml.remove(hp2);
  880. hp2.free;
  881. taicpu(hp1).opcode := A_CMP;
  882. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  883. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  884. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  885. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  886. end;
  887. ReleaseUsedRegs(TmpUsedRegs);
  888. end;
  889. end
  890. else if (taicpu(p).oper[1]^.typ=top_ref) and
  891. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  892. begin
  893. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  894. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  895. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  896. end
  897. else
  898. begin
  899. CopyUsedRegs(TmpUsedRegs);
  900. if GetNextInstruction(hp1, hp2) and
  901. MatchOpType(taicpu(p),top_ref,top_reg) and
  902. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  903. (taicpu(hp1).oper[1]^.typ = top_ref) and
  904. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  905. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  906. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  907. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  908. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  909. { mov mem1, %reg1
  910. mov %reg1, mem2
  911. mov mem2, reg2
  912. to:
  913. mov mem1, reg2
  914. mov reg2, mem2}
  915. begin
  916. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  917. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  918. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  919. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  920. asml.remove(hp2);
  921. hp2.free;
  922. end
  923. {$ifdef i386}
  924. { this is enabled for i386 only, as the rules to create the reg sets below
  925. are too complicated for x86-64, so this makes this code too error prone
  926. on x86-64
  927. }
  928. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  929. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  930. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  931. { mov mem1, reg1 mov mem1, reg1
  932. mov reg1, mem2 mov reg1, mem2
  933. mov mem2, reg2 mov mem2, reg1
  934. to: to:
  935. mov mem1, reg1 mov mem1, reg1
  936. mov mem1, reg2 mov reg1, mem2
  937. mov reg1, mem2
  938. or (if mem1 depends on reg1
  939. and/or if mem2 depends on reg2)
  940. to:
  941. mov mem1, reg1
  942. mov reg1, mem2
  943. mov reg1, reg2
  944. }
  945. begin
  946. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  947. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  948. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  949. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  950. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  951. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  952. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  953. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  954. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  955. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  956. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  957. end
  958. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  959. begin
  960. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  961. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  962. end
  963. else
  964. begin
  965. asml.remove(hp2);
  966. hp2.free;
  967. end
  968. {$endif i386}
  969. ;
  970. ReleaseUsedRegs(TmpUsedRegs);
  971. end;
  972. end
  973. (* { movl [mem1],reg1
  974. movl [mem1],reg2
  975. to
  976. movl [mem1],reg1
  977. movl reg1,reg2
  978. }
  979. else if (taicpu(p).oper[0]^.typ = top_ref) and
  980. (taicpu(p).oper[1]^.typ = top_reg) and
  981. (taicpu(hp1).oper[0]^.typ = top_ref) and
  982. (taicpu(hp1).oper[1]^.typ = top_reg) and
  983. (taicpu(p).opsize = taicpu(hp1).opsize) and
  984. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  985. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  986. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  987. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  988. else*)
  989. { movl const1,[mem1]
  990. movl [mem1],reg1
  991. to
  992. movl const1,reg1
  993. movl reg1,[mem1]
  994. }
  995. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  996. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  997. (taicpu(p).opsize = taicpu(hp1).opsize) and
  998. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  999. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1000. begin
  1001. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1002. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1003. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1004. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1005. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1006. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1007. end
  1008. end
  1009. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1010. GetNextIntruction_p and
  1011. (hp1.typ = ait_instruction) and
  1012. GetNextInstruction(hp1, hp2) and
  1013. MatchInstruction(hp2,A_MOV,[]) and
  1014. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1015. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1016. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1017. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1018. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1019. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1020. ) then
  1021. { change movsX/movzX reg/ref, reg2
  1022. add/sub/or/... reg3/$const, reg2
  1023. mov reg2 reg/ref
  1024. to add/sub/or/... reg3/$const, reg/ref }
  1025. begin
  1026. CopyUsedRegs(TmpUsedRegs);
  1027. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1028. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1029. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1030. begin
  1031. { by example:
  1032. movswl %si,%eax movswl %si,%eax p
  1033. decl %eax addl %edx,%eax hp1
  1034. movw %ax,%si movw %ax,%si hp2
  1035. ->
  1036. movswl %si,%eax movswl %si,%eax p
  1037. decw %eax addw %edx,%eax hp1
  1038. movw %ax,%si movw %ax,%si hp2
  1039. }
  1040. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1041. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1042. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1043. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1044. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1045. {
  1046. ->
  1047. movswl %si,%eax movswl %si,%eax p
  1048. decw %si addw %dx,%si hp1
  1049. movw %ax,%si movw %ax,%si hp2
  1050. }
  1051. case taicpu(hp1).ops of
  1052. 1:
  1053. begin
  1054. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1055. if taicpu(hp1).oper[0]^.typ=top_reg then
  1056. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1057. end;
  1058. 2:
  1059. begin
  1060. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1061. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1062. (taicpu(hp1).opcode<>A_SHL) and
  1063. (taicpu(hp1).opcode<>A_SHR) and
  1064. (taicpu(hp1).opcode<>A_SAR) then
  1065. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1066. end;
  1067. else
  1068. internalerror(2008042701);
  1069. end;
  1070. {
  1071. ->
  1072. decw %si addw %dx,%si p
  1073. }
  1074. asml.remove(p);
  1075. asml.remove(hp2);
  1076. p.Free;
  1077. hp2.Free;
  1078. p := hp1;
  1079. end;
  1080. ReleaseUsedRegs(TmpUsedRegs);
  1081. end
  1082. else if GetNextIntruction_p and
  1083. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1084. GetNextInstruction(hp1, hp2) and
  1085. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1086. MatchOperand(Taicpu(p).oper[0]^,0) and
  1087. (Taicpu(p).oper[1]^.typ = top_reg) and
  1088. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1089. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1090. { mov reg1,0
  1091. bts reg1,operand1 --> mov reg1,operand2
  1092. or reg1,operand2 bts reg1,operand1}
  1093. begin
  1094. Taicpu(hp2).opcode:=A_MOV;
  1095. asml.remove(hp1);
  1096. insertllitem(hp2,hp2.next,hp1);
  1097. asml.remove(p);
  1098. p.free;
  1099. p:=hp1;
  1100. end
  1101. else if GetNextIntruction_p and
  1102. MatchInstruction(hp1,A_LEA,[S_L]) and
  1103. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1104. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1105. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1106. ) or
  1107. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1108. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1109. )
  1110. ) then
  1111. { mov reg1,ref
  1112. lea reg2,[reg1,reg2]
  1113. to
  1114. add reg2,ref}
  1115. begin
  1116. CopyUsedRegs(TmpUsedRegs);
  1117. { reg1 may not be used afterwards }
  1118. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1119. begin
  1120. Taicpu(hp1).opcode:=A_ADD;
  1121. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1122. DebugMsg('Peephole MovLea2Add done',hp1);
  1123. asml.remove(p);
  1124. p.free;
  1125. p:=hp1;
  1126. end;
  1127. ReleaseUsedRegs(TmpUsedRegs);
  1128. end;
  1129. end;
  1130. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1131. var
  1132. TmpUsedRegs : TAllUsedRegs;
  1133. hp1,hp2: tai;
  1134. begin
  1135. Result:=false;
  1136. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1137. GetNextInstruction(p, hp1) and
  1138. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1139. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1140. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1141. or
  1142. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1143. ) and
  1144. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1145. { mov reg1, reg2
  1146. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1147. begin
  1148. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1149. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1150. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1151. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1152. asml.remove(p);
  1153. p.free;
  1154. p := hp1;
  1155. Result:=true;
  1156. exit;
  1157. end
  1158. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1159. GetNextInstruction(p,hp1) and
  1160. (hp1.typ = ait_instruction) and
  1161. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1162. doing it separately in both branches allows to do the cheap checks
  1163. with low probability earlier }
  1164. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1165. GetNextInstruction(hp1,hp2) and
  1166. MatchInstruction(hp2,A_MOV,[])
  1167. ) or
  1168. ((taicpu(hp1).opcode=A_LEA) and
  1169. GetNextInstruction(hp1,hp2) and
  1170. MatchInstruction(hp2,A_MOV,[]) and
  1171. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1172. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1173. ) or
  1174. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1175. taicpu(p).oper[1]^.reg) and
  1176. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1177. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1178. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1179. ) and
  1180. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1181. )
  1182. ) and
  1183. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1184. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1185. begin
  1186. CopyUsedRegs(TmpUsedRegs);
  1187. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1188. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1189. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1190. { change mov (ref), reg
  1191. add/sub/or/... reg2/$const, reg
  1192. mov reg, (ref)
  1193. # release reg
  1194. to add/sub/or/... reg2/$const, (ref) }
  1195. begin
  1196. case taicpu(hp1).opcode of
  1197. A_INC,A_DEC,A_NOT,A_NEG :
  1198. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1199. A_LEA :
  1200. begin
  1201. taicpu(hp1).opcode:=A_ADD;
  1202. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1203. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1204. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1205. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1206. else
  1207. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1208. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1209. DebugMsg('Peephole FoldLea done',hp1);
  1210. end
  1211. else
  1212. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1213. end;
  1214. asml.remove(p);
  1215. asml.remove(hp2);
  1216. p.free;
  1217. hp2.free;
  1218. p := hp1
  1219. end;
  1220. ReleaseUsedRegs(TmpUsedRegs);
  1221. end;
  1222. end;
  1223. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1224. var
  1225. TmpUsedRegs : TAllUsedRegs;
  1226. hp1 : tai;
  1227. begin
  1228. Result:=false;
  1229. if (taicpu(p).ops >= 2) and
  1230. ((taicpu(p).oper[0]^.typ = top_const) or
  1231. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1232. (taicpu(p).oper[1]^.typ = top_reg) and
  1233. ((taicpu(p).ops = 2) or
  1234. ((taicpu(p).oper[2]^.typ = top_reg) and
  1235. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1236. GetLastInstruction(p,hp1) and
  1237. MatchInstruction(hp1,A_MOV,[]) and
  1238. MatchOpType(hp1,top_reg,top_reg) and
  1239. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1240. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1241. begin
  1242. CopyUsedRegs(TmpUsedRegs);
  1243. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1244. { change
  1245. mov reg1,reg2
  1246. imul y,reg2 to imul y,reg1,reg2 }
  1247. begin
  1248. taicpu(p).ops := 3;
  1249. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1250. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1251. DebugMsg('Peephole MovImul2Imul done',p);
  1252. asml.remove(hp1);
  1253. hp1.free;
  1254. result:=true;
  1255. end;
  1256. ReleaseUsedRegs(TmpUsedRegs);
  1257. end;
  1258. end;
  1259. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1260. var
  1261. hp1 : tai;
  1262. begin
  1263. Result:=false;
  1264. if not(GetNextInstruction(p, hp1)) then
  1265. exit;
  1266. if MatchOpType(p,top_const,top_reg) and
  1267. MatchInstruction(hp1,A_AND,[]) and
  1268. MatchOpType(hp1,top_const,top_reg) and
  1269. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1270. { the second register must contain the first one, so compare their subreg types }
  1271. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1272. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1273. { change
  1274. and const1, reg
  1275. and const2, reg
  1276. to
  1277. and (const1 and const2), reg
  1278. }
  1279. begin
  1280. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1281. DebugMsg('Peephole AndAnd2And done',hp1);
  1282. asml.remove(p);
  1283. p.Free;
  1284. p:=hp1;
  1285. Result:=true;
  1286. exit;
  1287. end
  1288. else if MatchOpType(p,top_const,top_reg) and
  1289. MatchInstruction(hp1,A_MOVZX,[]) and
  1290. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1291. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1292. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1293. (((taicpu(p).opsize=S_W) and
  1294. (taicpu(hp1).opsize=S_BW)) or
  1295. ((taicpu(p).opsize=S_L) and
  1296. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1297. {$ifdef x86_64}
  1298. or
  1299. ((taicpu(p).opsize=S_Q) and
  1300. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1301. {$endif x86_64}
  1302. ) then
  1303. begin
  1304. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1305. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1306. ) or
  1307. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1308. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1309. {$ifdef x86_64}
  1310. or
  1311. (((taicpu(hp1).opsize)=S_LQ) and
  1312. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1313. )
  1314. {$endif x86_64}
  1315. then
  1316. begin
  1317. DebugMsg('Peephole AndMovzToAnd done',p);
  1318. asml.remove(hp1);
  1319. hp1.free;
  1320. end;
  1321. end
  1322. else if MatchOpType(p,top_const,top_reg) and
  1323. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1324. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1325. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1326. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1327. (((taicpu(p).opsize=S_W) and
  1328. (taicpu(hp1).opsize=S_BW)) or
  1329. ((taicpu(p).opsize=S_L) and
  1330. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1331. {$ifdef x86_64}
  1332. or
  1333. ((taicpu(p).opsize=S_Q) and
  1334. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1335. {$endif x86_64}
  1336. ) then
  1337. begin
  1338. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1339. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1340. ) or
  1341. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1342. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1343. {$ifdef x86_64}
  1344. or
  1345. (((taicpu(hp1).opsize)=S_LQ) and
  1346. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1347. )
  1348. {$endif x86_64}
  1349. then
  1350. begin
  1351. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1352. asml.remove(hp1);
  1353. hp1.free;
  1354. end;
  1355. end
  1356. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1357. (hp1.typ = ait_instruction) and
  1358. (taicpu(hp1).is_jmp) and
  1359. (taicpu(hp1).opcode<>A_JMP) and
  1360. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1361. { change
  1362. and x, reg
  1363. jxx
  1364. to
  1365. test x, reg
  1366. jxx
  1367. if reg is deallocated before the
  1368. jump, but only if it's a conditional jump (PFV)
  1369. }
  1370. taicpu(p).opcode := A_TEST;
  1371. end;
  1372. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1373. begin
  1374. if MatchOperand(taicpu(p).oper[0]^,0) and
  1375. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1376. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1377. { change "mov $0, %reg" into "xor %reg, %reg" }
  1378. begin
  1379. taicpu(p).opcode := A_XOR;
  1380. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1381. end;
  1382. end;
  1383. end.