aoptx86.pas 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS)) and
  368. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  369. (p.oper[1]^.typ = top_reg) and
  370. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  371. ((p.oper[0]^.typ = top_const) or
  372. ((p.oper[0]^.typ = top_reg) and
  373. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  374. ((p.oper[0]^.typ = top_ref) and
  375. not RegInRef(reg,p.oper[0]^.ref^)))) or
  376. ((p.opcode = A_POP) and
  377. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  378. ((p.opcode = A_IMUL) and
  379. (p.ops=3) and
  380. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  381. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  382. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^)))));
  383. end;
  384. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  385. var
  386. hp2,hp3 : tai;
  387. begin
  388. result:=(p.typ=ait_instruction) and
  389. ((taicpu(p).opcode = A_RET) or
  390. ((taicpu(p).opcode=A_LEAVE) and
  391. GetNextInstruction(p,hp2) and
  392. (hp2.typ=ait_instruction) and
  393. (taicpu(hp2).opcode=A_RET)
  394. ) or
  395. ((taicpu(p).opcode=A_MOV) and
  396. (taicpu(p).oper[0]^.typ=top_reg) and
  397. (taicpu(p).oper[0]^.reg=NR_EBP) and
  398. (taicpu(p).oper[1]^.typ=top_reg) and
  399. (taicpu(p).oper[1]^.reg=NR_ESP) and
  400. GetNextInstruction(p,hp2) and
  401. (hp2.typ=ait_instruction) and
  402. (taicpu(hp2).opcode=A_POP) and
  403. (taicpu(hp2).oper[0]^.typ=top_reg) and
  404. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  405. GetNextInstruction(hp2,hp3) and
  406. (hp3.typ=ait_instruction) and
  407. (taicpu(hp3).opcode=A_RET)
  408. )
  409. );
  410. end;
  411. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  412. begin
  413. isFoldableArithOp := False;
  414. case hp1.opcode of
  415. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  416. isFoldableArithOp :=
  417. ((taicpu(hp1).oper[0]^.typ = top_const) or
  418. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  419. (taicpu(hp1).oper[0]^.reg <> reg))) and
  420. (taicpu(hp1).oper[1]^.typ = top_reg) and
  421. (taicpu(hp1).oper[1]^.reg = reg);
  422. A_INC,A_DEC,A_NEG,A_NOT:
  423. isFoldableArithOp :=
  424. (taicpu(hp1).oper[0]^.typ = top_reg) and
  425. (taicpu(hp1).oper[0]^.reg = reg);
  426. end;
  427. end;
  428. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  429. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  430. var
  431. hp2: tai;
  432. begin
  433. hp2 := p;
  434. repeat
  435. hp2 := tai(hp2.previous);
  436. if assigned(hp2) and
  437. (hp2.typ = ait_regalloc) and
  438. (tai_regalloc(hp2).ratype=ra_dealloc) and
  439. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  440. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  441. begin
  442. asml.remove(hp2);
  443. hp2.free;
  444. break;
  445. end;
  446. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  447. end;
  448. begin
  449. case current_procinfo.procdef.returndef.typ of
  450. arraydef,recorddef,pointerdef,
  451. stringdef,enumdef,procdef,objectdef,errordef,
  452. filedef,setdef,procvardef,
  453. classrefdef,forwarddef:
  454. DoRemoveLastDeallocForFuncRes(RS_EAX);
  455. orddef:
  456. if current_procinfo.procdef.returndef.size <> 0 then
  457. begin
  458. DoRemoveLastDeallocForFuncRes(RS_EAX);
  459. { for int64/qword }
  460. if current_procinfo.procdef.returndef.size = 8 then
  461. DoRemoveLastDeallocForFuncRes(RS_EDX);
  462. end;
  463. end;
  464. end;
  465. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  466. var
  467. TmpUsedRegs : TAllUsedRegs;
  468. hp1,hp2 : tai;
  469. begin
  470. result:=false;
  471. if MatchOpType(taicpu(p),top_reg,top_reg) then
  472. begin
  473. { vmova* reg1,reg1
  474. =>
  475. <nop> }
  476. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  477. begin
  478. GetNextInstruction(p,hp1);
  479. asml.Remove(p);
  480. p.Free;
  481. p:=hp1;
  482. result:=true;
  483. end
  484. else if GetNextInstruction(p,hp1) then
  485. begin
  486. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  487. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  488. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  489. begin
  490. { vmova* reg1,reg2
  491. vmova* reg2,reg3
  492. dealloc reg2
  493. =>
  494. vmova* reg1,reg3 }
  495. CopyUsedRegs(TmpUsedRegs);
  496. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  497. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  498. begin
  499. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  500. asml.Remove(hp1);
  501. hp1.Free;
  502. result:=true;
  503. end
  504. { special case:
  505. vmova* reg1,reg2
  506. vmova* reg2,reg1
  507. =>
  508. vmova* reg1,reg2 }
  509. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  510. begin
  511. asml.Remove(hp1);
  512. hp1.Free;
  513. result:=true;
  514. end
  515. end
  516. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  517. { we mix single and double opperations here because we assume that the compiler
  518. generates vmovapd only after double operations and vmovaps only after single operations }
  519. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  520. GetNextInstruction(hp1,hp2) and
  521. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  522. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  523. begin
  524. CopyUsedRegs(TmpUsedRegs);
  525. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  526. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  527. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  528. then
  529. begin
  530. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  531. asml.Remove(p);
  532. p.Free;
  533. asml.Remove(hp2);
  534. hp2.Free;
  535. p:=hp1;
  536. end;
  537. end;
  538. end;
  539. end;
  540. end;
  541. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  542. var
  543. TmpUsedRegs : TAllUsedRegs;
  544. hp1 : tai;
  545. begin
  546. result:=false;
  547. if GetNextInstruction(p,hp1) and
  548. { we mix single and double opperations here because we assume that the compiler
  549. generates vmovapd only after double operations and vmovaps only after single operations }
  550. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  551. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  552. (taicpu(hp1).oper[1]^.typ=top_reg) then
  553. begin
  554. CopyUsedRegs(TmpUsedRegs);
  555. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  556. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  557. ) then
  558. begin
  559. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  560. asml.Remove(hp1);
  561. hp1.Free;
  562. result:=true;
  563. end;
  564. end;
  565. end;
  566. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  567. var
  568. hp1, hp2: tai;
  569. TmpUsedRegs : TAllUsedRegs;
  570. GetNextIntruction_p : Boolean;
  571. begin
  572. Result:=false;
  573. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  574. if GetNextIntruction_p and
  575. MatchInstruction(hp1,A_AND,[]) and
  576. (taicpu(p).oper[1]^.typ = top_reg) and
  577. MatchOpType(taicpu(hp1),top_const,top_reg) and
  578. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  579. case taicpu(p).opsize Of
  580. S_L:
  581. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  582. begin
  583. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  584. asml.remove(hp1);
  585. hp1.free;
  586. Result:=true;
  587. exit;
  588. end;
  589. end
  590. else if GetNextIntruction_p and
  591. MatchInstruction(hp1,A_MOV,[]) and
  592. (taicpu(p).oper[1]^.typ = top_reg) and
  593. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  594. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  595. begin
  596. CopyUsedRegs(TmpUsedRegs);
  597. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  598. { we have
  599. mov x, %treg
  600. mov %treg, y
  601. }
  602. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  603. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  604. { we've got
  605. mov x, %treg
  606. mov %treg, y
  607. with %treg is not used after }
  608. case taicpu(p).oper[0]^.typ Of
  609. top_reg:
  610. begin
  611. { change
  612. mov %reg, %treg
  613. mov %treg, y
  614. to
  615. mov %reg, y
  616. }
  617. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  618. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  619. asml.remove(hp1);
  620. hp1.free;
  621. ReleaseUsedRegs(TmpUsedRegs);
  622. Exit;
  623. end;
  624. top_ref:
  625. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  626. begin
  627. { change
  628. mov mem, %treg
  629. mov %treg, %reg
  630. to
  631. mov mem, %reg"
  632. }
  633. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  634. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  635. asml.remove(hp1);
  636. hp1.free;
  637. ReleaseUsedRegs(TmpUsedRegs);
  638. Exit;
  639. end;
  640. end;
  641. ReleaseUsedRegs(TmpUsedRegs);
  642. end
  643. else
  644. { Change
  645. mov %reg1, %reg2
  646. xxx %reg2, ???
  647. to
  648. mov %reg1, %reg2
  649. xxx %reg1, ???
  650. to avoid a write/read penalty
  651. }
  652. if MatchOpType(taicpu(p),top_reg,top_reg) and
  653. GetNextInstruction(p,hp1) and
  654. (tai(hp1).typ = ait_instruction) and
  655. (taicpu(hp1).ops >= 1) and
  656. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  657. { we have
  658. mov %reg1, %reg2
  659. XXX %reg2, ???
  660. }
  661. begin
  662. if ((taicpu(hp1).opcode = A_OR) or
  663. (taicpu(hp1).opcode = A_TEST)) and
  664. (taicpu(hp1).oper[1]^.typ = top_reg) and
  665. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  666. { we have
  667. mov %reg1, %reg2
  668. test/or %reg2, %reg2
  669. }
  670. begin
  671. CopyUsedRegs(TmpUsedRegs);
  672. { reg1 will be used after the first instruction,
  673. so update the allocation info }
  674. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  675. if GetNextInstruction(hp1, hp2) and
  676. (hp2.typ = ait_instruction) and
  677. taicpu(hp2).is_jmp and
  678. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  679. { change
  680. mov %reg1, %reg2
  681. test/or %reg2, %reg2
  682. jxx
  683. to
  684. test %reg1, %reg1
  685. jxx
  686. }
  687. begin
  688. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  689. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  690. asml.remove(p);
  691. p.free;
  692. p := hp1;
  693. ReleaseUsedRegs(TmpUsedRegs);
  694. Exit;
  695. end
  696. else
  697. { change
  698. mov %reg1, %reg2
  699. test/or %reg2, %reg2
  700. to
  701. mov %reg1, %reg2
  702. test/or %reg1, %reg1
  703. }
  704. begin
  705. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  706. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  707. end;
  708. ReleaseUsedRegs(TmpUsedRegs);
  709. end
  710. end
  711. else
  712. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  713. x >= RetOffset) as it doesn't do anything (it writes either to a
  714. parameter or to the temporary storage room for the function
  715. result)
  716. }
  717. if GetNextIntruction_p and
  718. (tai(hp1).typ = ait_instruction) then
  719. begin
  720. if IsExitCode(hp1) and
  721. MatchOpType(p,top_reg,top_ref) and
  722. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  723. not(assigned(current_procinfo.procdef.funcretsym) and
  724. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  725. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  726. begin
  727. asml.remove(p);
  728. p.free;
  729. p:=hp1;
  730. DebugMsg('Peephole removed deadstore before leave/ret',p);
  731. RemoveLastDeallocForFuncRes(p);
  732. exit;
  733. end
  734. { change
  735. mov reg1, mem1
  736. cmp x, mem1
  737. to
  738. mov reg1, mem1
  739. cmp x, reg1
  740. }
  741. else if MatchOpType(p,top_reg,top_ref) and
  742. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  743. (taicpu(hp1).oper[1]^.typ = top_ref) and
  744. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  745. begin
  746. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  747. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  748. end;
  749. end;
  750. { Next instruction is also a MOV ? }
  751. if GetNextIntruction_p and
  752. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  753. begin
  754. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  755. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  756. { mov reg1, mem1 or mov mem1, reg1
  757. mov mem2, reg2 mov reg2, mem2}
  758. begin
  759. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  760. { mov reg1, mem1 or mov mem1, reg1
  761. mov mem2, reg1 mov reg2, mem1}
  762. begin
  763. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  764. { Removes the second statement from
  765. mov reg1, mem1/reg2
  766. mov mem1/reg2, reg1 }
  767. begin
  768. if taicpu(p).oper[0]^.typ=top_reg then
  769. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  770. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  771. asml.remove(hp1);
  772. hp1.free;
  773. Result:=true;
  774. exit;
  775. end
  776. else
  777. begin
  778. CopyUsedRegs(TmpUsedRegs);
  779. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  780. if (taicpu(p).oper[1]^.typ = top_ref) and
  781. { mov reg1, mem1
  782. mov mem2, reg1 }
  783. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  784. GetNextInstruction(hp1, hp2) and
  785. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  786. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  787. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  788. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  789. { change to
  790. mov reg1, mem1 mov reg1, mem1
  791. mov mem2, reg1 cmp reg1, mem2
  792. cmp mem1, reg1
  793. }
  794. begin
  795. asml.remove(hp2);
  796. hp2.free;
  797. taicpu(hp1).opcode := A_CMP;
  798. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  799. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  800. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  801. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  802. end;
  803. ReleaseUsedRegs(TmpUsedRegs);
  804. end;
  805. end
  806. else if (taicpu(p).oper[1]^.typ=top_ref) and
  807. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  808. begin
  809. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  810. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  811. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  812. end
  813. else
  814. begin
  815. CopyUsedRegs(TmpUsedRegs);
  816. if GetNextInstruction(hp1, hp2) and
  817. MatchOpType(taicpu(p),top_ref,top_reg) and
  818. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  819. (taicpu(hp1).oper[1]^.typ = top_ref) and
  820. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  821. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  822. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  823. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  824. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  825. { mov mem1, %reg1
  826. mov %reg1, mem2
  827. mov mem2, reg2
  828. to:
  829. mov mem1, reg2
  830. mov reg2, mem2}
  831. begin
  832. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  833. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  834. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  835. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  836. asml.remove(hp2);
  837. hp2.free;
  838. end
  839. {$ifdef i386}
  840. { this is enabled for i386 only, as the rules to create the reg sets below
  841. are too complicated for x86-64, so this makes this code too error prone
  842. on x86-64
  843. }
  844. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  845. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  846. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  847. { mov mem1, reg1 mov mem1, reg1
  848. mov reg1, mem2 mov reg1, mem2
  849. mov mem2, reg2 mov mem2, reg1
  850. to: to:
  851. mov mem1, reg1 mov mem1, reg1
  852. mov mem1, reg2 mov reg1, mem2
  853. mov reg1, mem2
  854. or (if mem1 depends on reg1
  855. and/or if mem2 depends on reg2)
  856. to:
  857. mov mem1, reg1
  858. mov reg1, mem2
  859. mov reg1, reg2
  860. }
  861. begin
  862. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  863. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  864. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  865. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  866. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  867. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  868. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  869. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  870. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  871. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  872. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  873. end
  874. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  875. begin
  876. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  877. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  878. end
  879. else
  880. begin
  881. asml.remove(hp2);
  882. hp2.free;
  883. end
  884. {$endif i386}
  885. ;
  886. ReleaseUsedRegs(TmpUsedRegs);
  887. end;
  888. end
  889. (* { movl [mem1],reg1
  890. movl [mem1],reg2
  891. to
  892. movl [mem1],reg1
  893. movl reg1,reg2
  894. }
  895. else if (taicpu(p).oper[0]^.typ = top_ref) and
  896. (taicpu(p).oper[1]^.typ = top_reg) and
  897. (taicpu(hp1).oper[0]^.typ = top_ref) and
  898. (taicpu(hp1).oper[1]^.typ = top_reg) and
  899. (taicpu(p).opsize = taicpu(hp1).opsize) and
  900. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  901. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  902. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  903. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  904. else*)
  905. { movl const1,[mem1]
  906. movl [mem1],reg1
  907. to
  908. movl const1,reg1
  909. movl reg1,[mem1]
  910. }
  911. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  912. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  913. (taicpu(p).opsize = taicpu(hp1).opsize) and
  914. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  915. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  916. begin
  917. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  918. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  919. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  920. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  921. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  922. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  923. end
  924. end
  925. else if (taicpu(p).oper[1]^.typ = top_reg) and
  926. GetNextIntruction_p and
  927. (hp1.typ = ait_instruction) and
  928. GetNextInstruction(hp1, hp2) and
  929. MatchInstruction(hp2,A_MOV,[]) and
  930. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  931. (taicpu(hp2).oper[0]^.typ=top_reg) and
  932. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  933. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  934. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  935. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  936. ) then
  937. { change movsX/movzX reg/ref, reg2
  938. add/sub/or/... reg3/$const, reg2
  939. mov reg2 reg/ref
  940. to add/sub/or/... reg3/$const, reg/ref }
  941. begin
  942. CopyUsedRegs(TmpUsedRegs);
  943. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  944. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  945. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  946. begin
  947. { by example:
  948. movswl %si,%eax movswl %si,%eax p
  949. decl %eax addl %edx,%eax hp1
  950. movw %ax,%si movw %ax,%si hp2
  951. ->
  952. movswl %si,%eax movswl %si,%eax p
  953. decw %eax addw %edx,%eax hp1
  954. movw %ax,%si movw %ax,%si hp2
  955. }
  956. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  957. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  958. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  959. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  960. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  961. {
  962. ->
  963. movswl %si,%eax movswl %si,%eax p
  964. decw %si addw %dx,%si hp1
  965. movw %ax,%si movw %ax,%si hp2
  966. }
  967. case taicpu(hp1).ops of
  968. 1:
  969. begin
  970. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  971. if taicpu(hp1).oper[0]^.typ=top_reg then
  972. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  973. end;
  974. 2:
  975. begin
  976. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  977. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  978. (taicpu(hp1).opcode<>A_SHL) and
  979. (taicpu(hp1).opcode<>A_SHR) and
  980. (taicpu(hp1).opcode<>A_SAR) then
  981. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  982. end;
  983. else
  984. internalerror(2008042701);
  985. end;
  986. {
  987. ->
  988. decw %si addw %dx,%si p
  989. }
  990. asml.remove(p);
  991. asml.remove(hp2);
  992. p.Free;
  993. hp2.Free;
  994. p := hp1;
  995. end;
  996. ReleaseUsedRegs(TmpUsedRegs);
  997. end
  998. else if GetNextIntruction_p and
  999. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1000. GetNextInstruction(hp1, hp2) and
  1001. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1002. MatchOperand(Taicpu(p).oper[0]^,0) and
  1003. (Taicpu(p).oper[1]^.typ = top_reg) and
  1004. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1005. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1006. { mov reg1,0
  1007. bts reg1,operand1 --> mov reg1,operand2
  1008. or reg1,operand2 bts reg1,operand1}
  1009. begin
  1010. Taicpu(hp2).opcode:=A_MOV;
  1011. asml.remove(hp1);
  1012. insertllitem(hp2,hp2.next,hp1);
  1013. asml.remove(p);
  1014. p.free;
  1015. p:=hp1;
  1016. end
  1017. else if GetNextIntruction_p and
  1018. MatchInstruction(hp1,A_LEA,[S_L]) and
  1019. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1020. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1021. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1022. ) or
  1023. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1024. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1025. )
  1026. ) then
  1027. { mov reg1,ref
  1028. lea reg2,[reg1,reg2]
  1029. to
  1030. add reg2,ref}
  1031. begin
  1032. CopyUsedRegs(TmpUsedRegs);
  1033. { reg1 may not be used afterwards }
  1034. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1035. begin
  1036. Taicpu(hp1).opcode:=A_ADD;
  1037. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1038. DebugMsg('Peephole MovLea2Add done',hp1);
  1039. asml.remove(p);
  1040. p.free;
  1041. p:=hp1;
  1042. end;
  1043. ReleaseUsedRegs(TmpUsedRegs);
  1044. end;
  1045. end;
  1046. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1047. var
  1048. TmpUsedRegs : TAllUsedRegs;
  1049. hp1,hp2: tai;
  1050. begin
  1051. Result:=false;
  1052. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1053. GetNextInstruction(p, hp1) and
  1054. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1055. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1056. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1057. or
  1058. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1059. ) and
  1060. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1061. { mov reg1, reg2
  1062. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1063. begin
  1064. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1065. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1066. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1067. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1068. asml.remove(p);
  1069. p.free;
  1070. p := hp1;
  1071. Result:=true;
  1072. exit;
  1073. end
  1074. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1075. GetNextInstruction(p,hp1) and
  1076. (hp1.typ = ait_instruction) and
  1077. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1078. doing it separately in both branches allows to do the cheap checks
  1079. with low probability earlier }
  1080. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1081. GetNextInstruction(hp1,hp2) and
  1082. MatchInstruction(hp2,A_MOV,[])
  1083. ) or
  1084. ((taicpu(hp1).opcode=A_LEA) and
  1085. GetNextInstruction(hp1,hp2) and
  1086. MatchInstruction(hp2,A_MOV,[]) and
  1087. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1088. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1089. ) or
  1090. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1091. taicpu(p).oper[1]^.reg) and
  1092. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1093. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1094. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1095. ) and
  1096. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1097. )
  1098. ) and
  1099. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1100. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1101. begin
  1102. CopyUsedRegs(TmpUsedRegs);
  1103. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1104. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1105. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1106. { change mov (ref), reg
  1107. add/sub/or/... reg2/$const, reg
  1108. mov reg, (ref)
  1109. # release reg
  1110. to add/sub/or/... reg2/$const, (ref) }
  1111. begin
  1112. case taicpu(hp1).opcode of
  1113. A_INC,A_DEC,A_NOT,A_NEG :
  1114. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1115. A_LEA :
  1116. begin
  1117. taicpu(hp1).opcode:=A_ADD;
  1118. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1119. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1120. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1121. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1122. else
  1123. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1124. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1125. DebugMsg('Peephole FoldLea done',hp1);
  1126. end
  1127. else
  1128. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1129. end;
  1130. asml.remove(p);
  1131. asml.remove(hp2);
  1132. p.free;
  1133. hp2.free;
  1134. p := hp1
  1135. end;
  1136. ReleaseUsedRegs(TmpUsedRegs);
  1137. end;
  1138. end;
  1139. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1140. var
  1141. TmpUsedRegs : TAllUsedRegs;
  1142. hp1 : tai;
  1143. begin
  1144. Result:=false;
  1145. if (taicpu(p).ops >= 2) and
  1146. ((taicpu(p).oper[0]^.typ = top_const) or
  1147. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1148. (taicpu(p).oper[1]^.typ = top_reg) and
  1149. ((taicpu(p).ops = 2) or
  1150. ((taicpu(p).oper[2]^.typ = top_reg) and
  1151. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1152. GetLastInstruction(p,hp1) and
  1153. MatchInstruction(hp1,A_MOV,[]) and
  1154. MatchOpType(hp1,top_reg,top_reg) and
  1155. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1156. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1157. begin
  1158. CopyUsedRegs(TmpUsedRegs);
  1159. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1160. { change
  1161. mov reg1,reg2
  1162. imul y,reg2 to imul y,reg1,reg2 }
  1163. begin
  1164. taicpu(p).ops := 3;
  1165. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1166. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1167. DebugMsg('Peephole MovImul2Imul done',p);
  1168. asml.remove(hp1);
  1169. hp1.free;
  1170. result:=true;
  1171. end;
  1172. ReleaseUsedRegs(TmpUsedRegs);
  1173. end;
  1174. end;
  1175. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1176. var
  1177. hp1 : tai;
  1178. begin
  1179. Result:=false;
  1180. if not(GetNextInstruction(p, hp1)) then
  1181. exit;
  1182. if MatchOpType(p,top_const,top_reg) and
  1183. MatchInstruction(hp1,A_AND,[]) and
  1184. MatchOpType(hp1,top_const,top_reg) and
  1185. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1186. { the second register must contain the first one, so compare their subreg types }
  1187. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1188. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1189. { change
  1190. and const1, reg
  1191. and const2, reg
  1192. to
  1193. and (const1 and const2), reg
  1194. }
  1195. begin
  1196. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1197. DebugMsg('Peephole AndAnd2And done',hp1);
  1198. asml.remove(p);
  1199. p.Free;
  1200. p:=hp1;
  1201. Result:=true;
  1202. exit;
  1203. end
  1204. else if MatchOpType(p,top_const,top_reg) and
  1205. MatchInstruction(hp1,A_MOVZX,[]) and
  1206. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1207. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1208. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1209. (((taicpu(p).opsize=S_W) and
  1210. (taicpu(hp1).opsize=S_BW)) or
  1211. ((taicpu(p).opsize=S_L) and
  1212. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1213. {$ifdef x86_64}
  1214. or
  1215. ((taicpu(p).opsize=S_Q) and
  1216. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1217. {$endif x86_64}
  1218. ) then
  1219. begin
  1220. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1221. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1222. ) or
  1223. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1224. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1225. {$ifdef x86_64}
  1226. or
  1227. (((taicpu(hp1).opsize)=S_LQ) and
  1228. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1229. )
  1230. {$endif x86_64}
  1231. then
  1232. begin
  1233. DebugMsg('Peephole AndMovzToAnd done',p);
  1234. asml.remove(hp1);
  1235. hp1.free;
  1236. end;
  1237. end
  1238. else if MatchOpType(p,top_const,top_reg) and
  1239. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1240. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1241. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1242. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1243. (((taicpu(p).opsize=S_W) and
  1244. (taicpu(hp1).opsize=S_BW)) or
  1245. ((taicpu(p).opsize=S_L) and
  1246. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1247. {$ifdef x86_64}
  1248. or
  1249. ((taicpu(p).opsize=S_Q) and
  1250. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1251. {$endif x86_64}
  1252. ) then
  1253. begin
  1254. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1255. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1256. ) or
  1257. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1258. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1259. {$ifdef x86_64}
  1260. or
  1261. (((taicpu(hp1).opsize)=S_LQ) and
  1262. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1263. )
  1264. {$endif x86_64}
  1265. then
  1266. begin
  1267. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1268. asml.remove(hp1);
  1269. hp1.free;
  1270. end;
  1271. end
  1272. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1273. (hp1.typ = ait_instruction) and
  1274. (taicpu(hp1).is_jmp) and
  1275. (taicpu(hp1).opcode<>A_JMP) and
  1276. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1277. { change
  1278. and x, reg
  1279. jxx
  1280. to
  1281. test x, reg
  1282. jxx
  1283. if reg is deallocated before the
  1284. jump, but only if it's a conditional jump (PFV)
  1285. }
  1286. taicpu(p).opcode := A_TEST;
  1287. end;
  1288. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1289. begin
  1290. if MatchOperand(taicpu(p).oper[0]^,0) and
  1291. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1292. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1293. { change "mov $0, %reg" into "xor %reg, %reg" }
  1294. begin
  1295. taicpu(p).opcode := A_XOR;
  1296. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1297. end;
  1298. end;
  1299. end.