aoptx86.pas 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS) or
  368. {$ifndef x86_64}
  369. (p.opcode = A_LDS) or
  370. (p.opcode = A_LES) or
  371. {$endif not x86_64}
  372. (p.opcode = A_LFS) or
  373. (p.opcode = A_LGS) or
  374. (p.opcode = A_LSS)) and
  375. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  376. (p.oper[1]^.typ = top_reg) and
  377. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  378. ((p.oper[0]^.typ = top_const) or
  379. ((p.oper[0]^.typ = top_reg) and
  380. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  381. ((p.oper[0]^.typ = top_ref) and
  382. not RegInRef(reg,p.oper[0]^.ref^)))) or
  383. ((p.opcode = A_POP) and
  384. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  385. ((p.opcode = A_IMUL) and
  386. (p.ops=3) and
  387. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  388. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  389. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  390. ((((p.opcode = A_IMUL) or
  391. (p.opcode = A_MUL)) and
  392. (p.ops=1)) and
  393. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  394. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  395. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  396. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  397. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  398. {$ifdef x86_64}
  399. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  400. {$endif x86_64}
  401. )) or
  402. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  403. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  404. {$ifdef x86_64}
  405. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  406. {$endif x86_64}
  407. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  408. {$ifndef x86_64}
  409. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  410. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  411. {$endif not x86_64}
  412. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  413. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  414. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  415. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  416. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  417. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  418. {$ifdef x86_64}
  419. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  420. {$endif x86_64}
  421. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg));
  422. end;
  423. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  424. var
  425. hp2,hp3 : tai;
  426. begin
  427. result:=(p.typ=ait_instruction) and
  428. ((taicpu(p).opcode = A_RET) or
  429. ((taicpu(p).opcode=A_LEAVE) and
  430. GetNextInstruction(p,hp2) and
  431. (hp2.typ=ait_instruction) and
  432. (taicpu(hp2).opcode=A_RET)
  433. ) or
  434. ((taicpu(p).opcode=A_MOV) and
  435. (taicpu(p).oper[0]^.typ=top_reg) and
  436. (taicpu(p).oper[0]^.reg=NR_EBP) and
  437. (taicpu(p).oper[1]^.typ=top_reg) and
  438. (taicpu(p).oper[1]^.reg=NR_ESP) and
  439. GetNextInstruction(p,hp2) and
  440. (hp2.typ=ait_instruction) and
  441. (taicpu(hp2).opcode=A_POP) and
  442. (taicpu(hp2).oper[0]^.typ=top_reg) and
  443. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  444. GetNextInstruction(hp2,hp3) and
  445. (hp3.typ=ait_instruction) and
  446. (taicpu(hp3).opcode=A_RET)
  447. )
  448. );
  449. end;
  450. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  451. begin
  452. isFoldableArithOp := False;
  453. case hp1.opcode of
  454. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  455. isFoldableArithOp :=
  456. ((taicpu(hp1).oper[0]^.typ = top_const) or
  457. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  458. (taicpu(hp1).oper[0]^.reg <> reg))) and
  459. (taicpu(hp1).oper[1]^.typ = top_reg) and
  460. (taicpu(hp1).oper[1]^.reg = reg);
  461. A_INC,A_DEC,A_NEG,A_NOT:
  462. isFoldableArithOp :=
  463. (taicpu(hp1).oper[0]^.typ = top_reg) and
  464. (taicpu(hp1).oper[0]^.reg = reg);
  465. end;
  466. end;
  467. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  468. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  469. var
  470. hp2: tai;
  471. begin
  472. hp2 := p;
  473. repeat
  474. hp2 := tai(hp2.previous);
  475. if assigned(hp2) and
  476. (hp2.typ = ait_regalloc) and
  477. (tai_regalloc(hp2).ratype=ra_dealloc) and
  478. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  479. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  480. begin
  481. asml.remove(hp2);
  482. hp2.free;
  483. break;
  484. end;
  485. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  486. end;
  487. begin
  488. case current_procinfo.procdef.returndef.typ of
  489. arraydef,recorddef,pointerdef,
  490. stringdef,enumdef,procdef,objectdef,errordef,
  491. filedef,setdef,procvardef,
  492. classrefdef,forwarddef:
  493. DoRemoveLastDeallocForFuncRes(RS_EAX);
  494. orddef:
  495. if current_procinfo.procdef.returndef.size <> 0 then
  496. begin
  497. DoRemoveLastDeallocForFuncRes(RS_EAX);
  498. { for int64/qword }
  499. if current_procinfo.procdef.returndef.size = 8 then
  500. DoRemoveLastDeallocForFuncRes(RS_EDX);
  501. end;
  502. end;
  503. end;
  504. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  505. var
  506. TmpUsedRegs : TAllUsedRegs;
  507. hp1,hp2 : tai;
  508. begin
  509. result:=false;
  510. if MatchOpType(taicpu(p),top_reg,top_reg) then
  511. begin
  512. { vmova* reg1,reg1
  513. =>
  514. <nop> }
  515. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  516. begin
  517. GetNextInstruction(p,hp1);
  518. asml.Remove(p);
  519. p.Free;
  520. p:=hp1;
  521. result:=true;
  522. end
  523. else if GetNextInstruction(p,hp1) then
  524. begin
  525. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  526. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  527. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  528. begin
  529. { vmova* reg1,reg2
  530. vmova* reg2,reg3
  531. dealloc reg2
  532. =>
  533. vmova* reg1,reg3 }
  534. CopyUsedRegs(TmpUsedRegs);
  535. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  536. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  537. begin
  538. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  539. asml.Remove(hp1);
  540. hp1.Free;
  541. result:=true;
  542. end
  543. { special case:
  544. vmova* reg1,reg2
  545. vmova* reg2,reg1
  546. =>
  547. vmova* reg1,reg2 }
  548. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  549. begin
  550. asml.Remove(hp1);
  551. hp1.Free;
  552. result:=true;
  553. end
  554. end
  555. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  556. { we mix single and double opperations here because we assume that the compiler
  557. generates vmovapd only after double operations and vmovaps only after single operations }
  558. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  559. GetNextInstruction(hp1,hp2) and
  560. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  561. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  562. begin
  563. CopyUsedRegs(TmpUsedRegs);
  564. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  565. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  566. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  567. then
  568. begin
  569. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  570. asml.Remove(p);
  571. p.Free;
  572. asml.Remove(hp2);
  573. hp2.Free;
  574. p:=hp1;
  575. end;
  576. end;
  577. end;
  578. end;
  579. end;
  580. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  581. var
  582. TmpUsedRegs : TAllUsedRegs;
  583. hp1 : tai;
  584. begin
  585. result:=false;
  586. if GetNextInstruction(p,hp1) and
  587. { we mix single and double opperations here because we assume that the compiler
  588. generates vmovapd only after double operations and vmovaps only after single operations }
  589. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  590. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  591. (taicpu(hp1).oper[1]^.typ=top_reg) then
  592. begin
  593. CopyUsedRegs(TmpUsedRegs);
  594. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  595. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  596. ) then
  597. begin
  598. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  599. asml.Remove(hp1);
  600. hp1.Free;
  601. result:=true;
  602. end;
  603. end;
  604. end;
  605. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  606. var
  607. hp1, hp2: tai;
  608. TmpUsedRegs : TAllUsedRegs;
  609. GetNextIntruction_p : Boolean;
  610. begin
  611. Result:=false;
  612. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  613. if GetNextIntruction_p and
  614. MatchInstruction(hp1,A_AND,[]) and
  615. (taicpu(p).oper[1]^.typ = top_reg) and
  616. MatchOpType(taicpu(hp1),top_const,top_reg) and
  617. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  618. case taicpu(p).opsize Of
  619. S_L:
  620. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  621. begin
  622. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  623. asml.remove(hp1);
  624. hp1.free;
  625. Result:=true;
  626. exit;
  627. end;
  628. end
  629. else if GetNextIntruction_p and
  630. MatchInstruction(hp1,A_MOV,[]) and
  631. (taicpu(p).oper[1]^.typ = top_reg) and
  632. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  633. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  634. begin
  635. CopyUsedRegs(TmpUsedRegs);
  636. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  637. { we have
  638. mov x, %treg
  639. mov %treg, y
  640. }
  641. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  642. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  643. { we've got
  644. mov x, %treg
  645. mov %treg, y
  646. with %treg is not used after }
  647. case taicpu(p).oper[0]^.typ Of
  648. top_reg:
  649. begin
  650. { change
  651. mov %reg, %treg
  652. mov %treg, y
  653. to
  654. mov %reg, y
  655. }
  656. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  657. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  658. asml.remove(hp1);
  659. hp1.free;
  660. ReleaseUsedRegs(TmpUsedRegs);
  661. Exit;
  662. end;
  663. top_ref:
  664. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  665. begin
  666. { change
  667. mov mem, %treg
  668. mov %treg, %reg
  669. to
  670. mov mem, %reg"
  671. }
  672. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  673. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  674. asml.remove(hp1);
  675. hp1.free;
  676. ReleaseUsedRegs(TmpUsedRegs);
  677. Exit;
  678. end;
  679. end;
  680. ReleaseUsedRegs(TmpUsedRegs);
  681. end
  682. else
  683. { Change
  684. mov %reg1, %reg2
  685. xxx %reg2, ???
  686. to
  687. mov %reg1, %reg2
  688. xxx %reg1, ???
  689. to avoid a write/read penalty
  690. }
  691. if MatchOpType(taicpu(p),top_reg,top_reg) and
  692. GetNextInstruction(p,hp1) and
  693. (tai(hp1).typ = ait_instruction) and
  694. (taicpu(hp1).ops >= 1) and
  695. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  696. { we have
  697. mov %reg1, %reg2
  698. XXX %reg2, ???
  699. }
  700. begin
  701. if ((taicpu(hp1).opcode = A_OR) or
  702. (taicpu(hp1).opcode = A_TEST)) and
  703. (taicpu(hp1).oper[1]^.typ = top_reg) and
  704. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  705. { we have
  706. mov %reg1, %reg2
  707. test/or %reg2, %reg2
  708. }
  709. begin
  710. CopyUsedRegs(TmpUsedRegs);
  711. { reg1 will be used after the first instruction,
  712. so update the allocation info }
  713. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  714. if GetNextInstruction(hp1, hp2) and
  715. (hp2.typ = ait_instruction) and
  716. taicpu(hp2).is_jmp and
  717. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  718. { change
  719. mov %reg1, %reg2
  720. test/or %reg2, %reg2
  721. jxx
  722. to
  723. test %reg1, %reg1
  724. jxx
  725. }
  726. begin
  727. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  728. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  729. asml.remove(p);
  730. p.free;
  731. p := hp1;
  732. ReleaseUsedRegs(TmpUsedRegs);
  733. Exit;
  734. end
  735. else
  736. { change
  737. mov %reg1, %reg2
  738. test/or %reg2, %reg2
  739. to
  740. mov %reg1, %reg2
  741. test/or %reg1, %reg1
  742. }
  743. begin
  744. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  745. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  746. end;
  747. ReleaseUsedRegs(TmpUsedRegs);
  748. end
  749. end
  750. else
  751. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  752. x >= RetOffset) as it doesn't do anything (it writes either to a
  753. parameter or to the temporary storage room for the function
  754. result)
  755. }
  756. if GetNextIntruction_p and
  757. (tai(hp1).typ = ait_instruction) then
  758. begin
  759. if IsExitCode(hp1) and
  760. MatchOpType(p,top_reg,top_ref) and
  761. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  762. not(assigned(current_procinfo.procdef.funcretsym) and
  763. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  764. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  765. begin
  766. asml.remove(p);
  767. p.free;
  768. p:=hp1;
  769. DebugMsg('Peephole removed deadstore before leave/ret',p);
  770. RemoveLastDeallocForFuncRes(p);
  771. exit;
  772. end
  773. { change
  774. mov reg1, mem1
  775. cmp x, mem1
  776. to
  777. mov reg1, mem1
  778. cmp x, reg1
  779. }
  780. else if MatchOpType(p,top_reg,top_ref) and
  781. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  782. (taicpu(hp1).oper[1]^.typ = top_ref) and
  783. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  784. begin
  785. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  786. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  787. end;
  788. end;
  789. { Next instruction is also a MOV ? }
  790. if GetNextIntruction_p and
  791. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  792. begin
  793. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  794. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  795. { mov reg1, mem1 or mov mem1, reg1
  796. mov mem2, reg2 mov reg2, mem2}
  797. begin
  798. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  799. { mov reg1, mem1 or mov mem1, reg1
  800. mov mem2, reg1 mov reg2, mem1}
  801. begin
  802. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  803. { Removes the second statement from
  804. mov reg1, mem1/reg2
  805. mov mem1/reg2, reg1 }
  806. begin
  807. if taicpu(p).oper[0]^.typ=top_reg then
  808. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  809. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  810. asml.remove(hp1);
  811. hp1.free;
  812. Result:=true;
  813. exit;
  814. end
  815. else
  816. begin
  817. CopyUsedRegs(TmpUsedRegs);
  818. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  819. if (taicpu(p).oper[1]^.typ = top_ref) and
  820. { mov reg1, mem1
  821. mov mem2, reg1 }
  822. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  823. GetNextInstruction(hp1, hp2) and
  824. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  825. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  826. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  827. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  828. { change to
  829. mov reg1, mem1 mov reg1, mem1
  830. mov mem2, reg1 cmp reg1, mem2
  831. cmp mem1, reg1
  832. }
  833. begin
  834. asml.remove(hp2);
  835. hp2.free;
  836. taicpu(hp1).opcode := A_CMP;
  837. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  838. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  839. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  840. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  841. end;
  842. ReleaseUsedRegs(TmpUsedRegs);
  843. end;
  844. end
  845. else if (taicpu(p).oper[1]^.typ=top_ref) and
  846. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  847. begin
  848. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  849. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  850. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  851. end
  852. else
  853. begin
  854. CopyUsedRegs(TmpUsedRegs);
  855. if GetNextInstruction(hp1, hp2) and
  856. MatchOpType(taicpu(p),top_ref,top_reg) and
  857. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  858. (taicpu(hp1).oper[1]^.typ = top_ref) and
  859. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  860. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  861. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  862. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  863. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  864. { mov mem1, %reg1
  865. mov %reg1, mem2
  866. mov mem2, reg2
  867. to:
  868. mov mem1, reg2
  869. mov reg2, mem2}
  870. begin
  871. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  872. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  873. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  874. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  875. asml.remove(hp2);
  876. hp2.free;
  877. end
  878. {$ifdef i386}
  879. { this is enabled for i386 only, as the rules to create the reg sets below
  880. are too complicated for x86-64, so this makes this code too error prone
  881. on x86-64
  882. }
  883. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  884. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  885. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  886. { mov mem1, reg1 mov mem1, reg1
  887. mov reg1, mem2 mov reg1, mem2
  888. mov mem2, reg2 mov mem2, reg1
  889. to: to:
  890. mov mem1, reg1 mov mem1, reg1
  891. mov mem1, reg2 mov reg1, mem2
  892. mov reg1, mem2
  893. or (if mem1 depends on reg1
  894. and/or if mem2 depends on reg2)
  895. to:
  896. mov mem1, reg1
  897. mov reg1, mem2
  898. mov reg1, reg2
  899. }
  900. begin
  901. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  902. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  903. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  904. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  905. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  906. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  907. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  908. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  909. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  910. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  911. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  912. end
  913. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  914. begin
  915. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  916. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  917. end
  918. else
  919. begin
  920. asml.remove(hp2);
  921. hp2.free;
  922. end
  923. {$endif i386}
  924. ;
  925. ReleaseUsedRegs(TmpUsedRegs);
  926. end;
  927. end
  928. (* { movl [mem1],reg1
  929. movl [mem1],reg2
  930. to
  931. movl [mem1],reg1
  932. movl reg1,reg2
  933. }
  934. else if (taicpu(p).oper[0]^.typ = top_ref) and
  935. (taicpu(p).oper[1]^.typ = top_reg) and
  936. (taicpu(hp1).oper[0]^.typ = top_ref) and
  937. (taicpu(hp1).oper[1]^.typ = top_reg) and
  938. (taicpu(p).opsize = taicpu(hp1).opsize) and
  939. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  940. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  941. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  942. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  943. else*)
  944. { movl const1,[mem1]
  945. movl [mem1],reg1
  946. to
  947. movl const1,reg1
  948. movl reg1,[mem1]
  949. }
  950. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  951. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  952. (taicpu(p).opsize = taicpu(hp1).opsize) and
  953. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  954. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  955. begin
  956. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  957. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  958. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  959. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  960. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  961. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  962. end
  963. end
  964. else if (taicpu(p).oper[1]^.typ = top_reg) and
  965. GetNextIntruction_p and
  966. (hp1.typ = ait_instruction) and
  967. GetNextInstruction(hp1, hp2) and
  968. MatchInstruction(hp2,A_MOV,[]) and
  969. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  970. (taicpu(hp2).oper[0]^.typ=top_reg) and
  971. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  972. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  973. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  974. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  975. ) then
  976. { change movsX/movzX reg/ref, reg2
  977. add/sub/or/... reg3/$const, reg2
  978. mov reg2 reg/ref
  979. to add/sub/or/... reg3/$const, reg/ref }
  980. begin
  981. CopyUsedRegs(TmpUsedRegs);
  982. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  983. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  984. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  985. begin
  986. { by example:
  987. movswl %si,%eax movswl %si,%eax p
  988. decl %eax addl %edx,%eax hp1
  989. movw %ax,%si movw %ax,%si hp2
  990. ->
  991. movswl %si,%eax movswl %si,%eax p
  992. decw %eax addw %edx,%eax hp1
  993. movw %ax,%si movw %ax,%si hp2
  994. }
  995. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  996. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  997. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  998. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  999. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1000. {
  1001. ->
  1002. movswl %si,%eax movswl %si,%eax p
  1003. decw %si addw %dx,%si hp1
  1004. movw %ax,%si movw %ax,%si hp2
  1005. }
  1006. case taicpu(hp1).ops of
  1007. 1:
  1008. begin
  1009. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1010. if taicpu(hp1).oper[0]^.typ=top_reg then
  1011. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1012. end;
  1013. 2:
  1014. begin
  1015. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1016. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1017. (taicpu(hp1).opcode<>A_SHL) and
  1018. (taicpu(hp1).opcode<>A_SHR) and
  1019. (taicpu(hp1).opcode<>A_SAR) then
  1020. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1021. end;
  1022. else
  1023. internalerror(2008042701);
  1024. end;
  1025. {
  1026. ->
  1027. decw %si addw %dx,%si p
  1028. }
  1029. asml.remove(p);
  1030. asml.remove(hp2);
  1031. p.Free;
  1032. hp2.Free;
  1033. p := hp1;
  1034. end;
  1035. ReleaseUsedRegs(TmpUsedRegs);
  1036. end
  1037. else if GetNextIntruction_p and
  1038. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1039. GetNextInstruction(hp1, hp2) and
  1040. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1041. MatchOperand(Taicpu(p).oper[0]^,0) and
  1042. (Taicpu(p).oper[1]^.typ = top_reg) and
  1043. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1044. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1045. { mov reg1,0
  1046. bts reg1,operand1 --> mov reg1,operand2
  1047. or reg1,operand2 bts reg1,operand1}
  1048. begin
  1049. Taicpu(hp2).opcode:=A_MOV;
  1050. asml.remove(hp1);
  1051. insertllitem(hp2,hp2.next,hp1);
  1052. asml.remove(p);
  1053. p.free;
  1054. p:=hp1;
  1055. end
  1056. else if GetNextIntruction_p and
  1057. MatchInstruction(hp1,A_LEA,[S_L]) and
  1058. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1059. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1060. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1061. ) or
  1062. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1063. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1064. )
  1065. ) then
  1066. { mov reg1,ref
  1067. lea reg2,[reg1,reg2]
  1068. to
  1069. add reg2,ref}
  1070. begin
  1071. CopyUsedRegs(TmpUsedRegs);
  1072. { reg1 may not be used afterwards }
  1073. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1074. begin
  1075. Taicpu(hp1).opcode:=A_ADD;
  1076. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1077. DebugMsg('Peephole MovLea2Add done',hp1);
  1078. asml.remove(p);
  1079. p.free;
  1080. p:=hp1;
  1081. end;
  1082. ReleaseUsedRegs(TmpUsedRegs);
  1083. end;
  1084. end;
  1085. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1086. var
  1087. TmpUsedRegs : TAllUsedRegs;
  1088. hp1,hp2: tai;
  1089. begin
  1090. Result:=false;
  1091. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1092. GetNextInstruction(p, hp1) and
  1093. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1094. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1095. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1096. or
  1097. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1098. ) and
  1099. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1100. { mov reg1, reg2
  1101. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1102. begin
  1103. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1104. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1105. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1106. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1107. asml.remove(p);
  1108. p.free;
  1109. p := hp1;
  1110. Result:=true;
  1111. exit;
  1112. end
  1113. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1114. GetNextInstruction(p,hp1) and
  1115. (hp1.typ = ait_instruction) and
  1116. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1117. doing it separately in both branches allows to do the cheap checks
  1118. with low probability earlier }
  1119. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1120. GetNextInstruction(hp1,hp2) and
  1121. MatchInstruction(hp2,A_MOV,[])
  1122. ) or
  1123. ((taicpu(hp1).opcode=A_LEA) and
  1124. GetNextInstruction(hp1,hp2) and
  1125. MatchInstruction(hp2,A_MOV,[]) and
  1126. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1127. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1128. ) or
  1129. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1130. taicpu(p).oper[1]^.reg) and
  1131. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1132. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1133. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1134. ) and
  1135. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1136. )
  1137. ) and
  1138. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1139. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1140. begin
  1141. CopyUsedRegs(TmpUsedRegs);
  1142. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1143. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1144. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1145. { change mov (ref), reg
  1146. add/sub/or/... reg2/$const, reg
  1147. mov reg, (ref)
  1148. # release reg
  1149. to add/sub/or/... reg2/$const, (ref) }
  1150. begin
  1151. case taicpu(hp1).opcode of
  1152. A_INC,A_DEC,A_NOT,A_NEG :
  1153. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1154. A_LEA :
  1155. begin
  1156. taicpu(hp1).opcode:=A_ADD;
  1157. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1158. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1159. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1160. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1161. else
  1162. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1163. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1164. DebugMsg('Peephole FoldLea done',hp1);
  1165. end
  1166. else
  1167. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1168. end;
  1169. asml.remove(p);
  1170. asml.remove(hp2);
  1171. p.free;
  1172. hp2.free;
  1173. p := hp1
  1174. end;
  1175. ReleaseUsedRegs(TmpUsedRegs);
  1176. end;
  1177. end;
  1178. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1179. var
  1180. TmpUsedRegs : TAllUsedRegs;
  1181. hp1 : tai;
  1182. begin
  1183. Result:=false;
  1184. if (taicpu(p).ops >= 2) and
  1185. ((taicpu(p).oper[0]^.typ = top_const) or
  1186. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1187. (taicpu(p).oper[1]^.typ = top_reg) and
  1188. ((taicpu(p).ops = 2) or
  1189. ((taicpu(p).oper[2]^.typ = top_reg) and
  1190. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1191. GetLastInstruction(p,hp1) and
  1192. MatchInstruction(hp1,A_MOV,[]) and
  1193. MatchOpType(hp1,top_reg,top_reg) and
  1194. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1195. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1196. begin
  1197. CopyUsedRegs(TmpUsedRegs);
  1198. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1199. { change
  1200. mov reg1,reg2
  1201. imul y,reg2 to imul y,reg1,reg2 }
  1202. begin
  1203. taicpu(p).ops := 3;
  1204. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1205. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1206. DebugMsg('Peephole MovImul2Imul done',p);
  1207. asml.remove(hp1);
  1208. hp1.free;
  1209. result:=true;
  1210. end;
  1211. ReleaseUsedRegs(TmpUsedRegs);
  1212. end;
  1213. end;
  1214. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1215. var
  1216. hp1 : tai;
  1217. begin
  1218. Result:=false;
  1219. if not(GetNextInstruction(p, hp1)) then
  1220. exit;
  1221. if MatchOpType(p,top_const,top_reg) and
  1222. MatchInstruction(hp1,A_AND,[]) and
  1223. MatchOpType(hp1,top_const,top_reg) and
  1224. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1225. { the second register must contain the first one, so compare their subreg types }
  1226. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1227. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1228. { change
  1229. and const1, reg
  1230. and const2, reg
  1231. to
  1232. and (const1 and const2), reg
  1233. }
  1234. begin
  1235. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1236. DebugMsg('Peephole AndAnd2And done',hp1);
  1237. asml.remove(p);
  1238. p.Free;
  1239. p:=hp1;
  1240. Result:=true;
  1241. exit;
  1242. end
  1243. else if MatchOpType(p,top_const,top_reg) and
  1244. MatchInstruction(hp1,A_MOVZX,[]) and
  1245. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1246. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1247. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1248. (((taicpu(p).opsize=S_W) and
  1249. (taicpu(hp1).opsize=S_BW)) or
  1250. ((taicpu(p).opsize=S_L) and
  1251. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1252. {$ifdef x86_64}
  1253. or
  1254. ((taicpu(p).opsize=S_Q) and
  1255. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1256. {$endif x86_64}
  1257. ) then
  1258. begin
  1259. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1260. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1261. ) or
  1262. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1263. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1264. {$ifdef x86_64}
  1265. or
  1266. (((taicpu(hp1).opsize)=S_LQ) and
  1267. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1268. )
  1269. {$endif x86_64}
  1270. then
  1271. begin
  1272. DebugMsg('Peephole AndMovzToAnd done',p);
  1273. asml.remove(hp1);
  1274. hp1.free;
  1275. end;
  1276. end
  1277. else if MatchOpType(p,top_const,top_reg) and
  1278. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1279. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1280. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1281. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1282. (((taicpu(p).opsize=S_W) and
  1283. (taicpu(hp1).opsize=S_BW)) or
  1284. ((taicpu(p).opsize=S_L) and
  1285. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1286. {$ifdef x86_64}
  1287. or
  1288. ((taicpu(p).opsize=S_Q) and
  1289. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1290. {$endif x86_64}
  1291. ) then
  1292. begin
  1293. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1294. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1295. ) or
  1296. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1297. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1298. {$ifdef x86_64}
  1299. or
  1300. (((taicpu(hp1).opsize)=S_LQ) and
  1301. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1302. )
  1303. {$endif x86_64}
  1304. then
  1305. begin
  1306. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1307. asml.remove(hp1);
  1308. hp1.free;
  1309. end;
  1310. end
  1311. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1312. (hp1.typ = ait_instruction) and
  1313. (taicpu(hp1).is_jmp) and
  1314. (taicpu(hp1).opcode<>A_JMP) and
  1315. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1316. { change
  1317. and x, reg
  1318. jxx
  1319. to
  1320. test x, reg
  1321. jxx
  1322. if reg is deallocated before the
  1323. jump, but only if it's a conditional jump (PFV)
  1324. }
  1325. taicpu(p).opcode := A_TEST;
  1326. end;
  1327. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1328. begin
  1329. if MatchOperand(taicpu(p).oper[0]^,0) and
  1330. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1331. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1332. { change "mov $0, %reg" into "xor %reg, %reg" }
  1333. begin
  1334. taicpu(p).opcode := A_XOR;
  1335. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1336. end;
  1337. end;
  1338. end.