aoptx86.pas 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. { returns true, if ref is a reference using only the registers passed as base and index
  53. and having an offset }
  54. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  55. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  56. implementation
  57. uses
  58. cutils,
  59. verbose,
  60. procinfo,
  61. symconst,symsym,
  62. itcpugas;
  63. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  64. begin
  65. result :=
  66. (instr.typ = ait_instruction) and
  67. (taicpu(instr).opcode = op) and
  68. ((opsize = []) or (taicpu(instr).opsize in opsize));
  69. end;
  70. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  71. begin
  72. result :=
  73. (instr.typ = ait_instruction) and
  74. ((taicpu(instr).opcode = op1) or
  75. (taicpu(instr).opcode = op2)
  76. ) and
  77. ((opsize = []) or (taicpu(instr).opsize in opsize));
  78. end;
  79. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. ((taicpu(instr).opcode = op1) or
  84. (taicpu(instr).opcode = op2) or
  85. (taicpu(instr).opcode = op3)
  86. ) and
  87. ((opsize = []) or (taicpu(instr).opsize in opsize));
  88. end;
  89. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  90. const opsize : topsizes) : boolean;
  91. var
  92. op : TAsmOp;
  93. begin
  94. result:=false;
  95. for op in ops do
  96. begin
  97. if (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  100. begin
  101. result:=true;
  102. exit;
  103. end;
  104. end;
  105. end;
  106. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  107. begin
  108. result := (oper.typ = top_reg) and (oper.reg = reg);
  109. end;
  110. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  111. begin
  112. result := (oper.typ = top_const) and (oper.val = a);
  113. end;
  114. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  115. begin
  116. result := oper1.typ = oper2.typ;
  117. if result then
  118. case oper1.typ of
  119. top_const:
  120. Result:=oper1.val = oper2.val;
  121. top_reg:
  122. Result:=oper1.reg = oper2.reg;
  123. top_ref:
  124. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  125. else
  126. internalerror(2013102801);
  127. end
  128. end;
  129. function RefsEqual(const r1, r2: treference): boolean;
  130. begin
  131. RefsEqual :=
  132. (r1.offset = r2.offset) and
  133. (r1.segment = r2.segment) and (r1.base = r2.base) and
  134. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  135. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  136. (r1.relsymbol = r2.relsymbol);
  137. end;
  138. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  139. begin
  140. Result:=(ref.offset=0) and
  141. (ref.scalefactor in [0,1]) and
  142. (ref.segment=NR_NO) and
  143. (ref.symbol=nil) and
  144. (ref.relsymbol=nil) and
  145. ((base=NR_INVALID) or
  146. (ref.base=base)) and
  147. ((index=NR_INVALID) or
  148. (ref.index=index));
  149. end;
  150. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  151. begin
  152. Result:=(ref.scalefactor in [0,1]) and
  153. (ref.segment=NR_NO) and
  154. (ref.symbol=nil) and
  155. (ref.relsymbol=nil) and
  156. ((base=NR_INVALID) or
  157. (ref.base=base)) and
  158. ((index=NR_INVALID) or
  159. (ref.index=index));
  160. end;
  161. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  162. begin
  163. Result:=(taicpu(instr).ops=2) and
  164. (taicpu(instr).oper[0]^.typ=ot0) and
  165. (taicpu(instr).oper[1]^.typ=ot1);
  166. end;
  167. {$ifdef DEBUG_AOPTCPU}
  168. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  169. begin
  170. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  171. end;
  172. {$else DEBUG_AOPTCPU}
  173. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  174. begin
  175. end;
  176. {$endif DEBUG_AOPTCPU}
  177. { allocates register reg between (and including) instructions p1 and p2
  178. the type of p1 and p2 must not be in SkipInstr
  179. note that this routine is both called from the peephole optimizer
  180. where optinfo is not yet initialised) and from the cse (where it is) }
  181. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  182. var
  183. hp, start: tai;
  184. removedsomething,
  185. firstRemovedWasAlloc,
  186. lastRemovedWasDealloc: boolean;
  187. begin
  188. {$ifdef EXTDEBUG}
  189. { if assigned(p1.optinfo) and
  190. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  191. internalerror(2004101010); }
  192. {$endif EXTDEBUG}
  193. start := p1;
  194. if (reg = NR_ESP) or
  195. (reg = current_procinfo.framepointer) or
  196. not(assigned(p1)) then
  197. { this happens with registers which are loaded implicitely, outside the }
  198. { current block (e.g. esi with self) }
  199. exit;
  200. { make sure we allocate it for this instruction }
  201. getnextinstruction(p2,p2);
  202. lastRemovedWasDealloc := false;
  203. removedSomething := false;
  204. firstRemovedWasAlloc := false;
  205. {$ifdef allocregdebug}
  206. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  207. ' from here...'));
  208. insertllitem(asml,p1.previous,p1,hp);
  209. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  210. ' till here...'));
  211. insertllitem(asml,p2,p2.next,hp);
  212. {$endif allocregdebug}
  213. { do it the safe way: always allocate the full super register,
  214. as we do no register re-allocation in the peephole optimizer,
  215. this does not hurt
  216. }
  217. case getregtype(reg) of
  218. R_MMREGISTER:
  219. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  220. R_INTREGISTER:
  221. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  222. end;
  223. if not(RegInUsedRegs(reg,initialusedregs)) then
  224. begin
  225. hp := tai_regalloc.alloc(reg,nil);
  226. insertllItem(p1.previous,p1,hp);
  227. IncludeRegInUsedRegs(reg,initialusedregs);
  228. end;
  229. while assigned(p1) and
  230. (p1 <> p2) do
  231. begin
  232. if assigned(p1.optinfo) then
  233. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  234. p1 := tai(p1.next);
  235. repeat
  236. while assigned(p1) and
  237. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  238. p1 := tai(p1.next);
  239. { remove all allocation/deallocation info about the register in between }
  240. if assigned(p1) and
  241. (p1.typ = ait_regalloc) then
  242. begin
  243. { same super register, different sub register? }
  244. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  245. begin
  246. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  247. internalerror(2016101501);
  248. tai_regalloc(p1).reg:=reg;
  249. end;
  250. if tai_regalloc(p1).reg=reg then
  251. begin
  252. if not removedSomething then
  253. begin
  254. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  255. removedSomething := true;
  256. end;
  257. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  258. hp := tai(p1.Next);
  259. asml.Remove(p1);
  260. p1.free;
  261. p1 := hp;
  262. end
  263. else
  264. p1 := tai(p1.next);
  265. end;
  266. until not(assigned(p1)) or
  267. not(p1.typ in SkipInstr);
  268. end;
  269. if assigned(p1) then
  270. begin
  271. if firstRemovedWasAlloc then
  272. begin
  273. hp := tai_regalloc.Alloc(reg,nil);
  274. insertLLItem(start.previous,start,hp);
  275. end;
  276. if lastRemovedWasDealloc then
  277. begin
  278. hp := tai_regalloc.DeAlloc(reg,nil);
  279. insertLLItem(p1.previous,p1,hp);
  280. end;
  281. end;
  282. end;
  283. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  284. var
  285. p: taicpu;
  286. begin
  287. if not assigned(hp) or
  288. (hp.typ <> ait_instruction) then
  289. begin
  290. Result := false;
  291. exit;
  292. end;
  293. p := taicpu(hp);
  294. Result :=
  295. (((p.opcode = A_MOV) or
  296. (p.opcode = A_MOVZX) or
  297. (p.opcode = A_MOVSX) or
  298. (p.opcode = A_LEA) or
  299. (p.opcode = A_VMOVSS) or
  300. (p.opcode = A_VMOVSD) or
  301. (p.opcode = A_VMOVAPD) or
  302. (p.opcode = A_VMOVAPS) or
  303. (p.opcode = A_VMOVQ) or
  304. (p.opcode = A_MOVSS) or
  305. (p.opcode = A_MOVSD) or
  306. (p.opcode = A_MOVQ) or
  307. (p.opcode = A_MOVAPD) or
  308. (p.opcode = A_MOVAPS)) and
  309. (p.oper[1]^.typ = top_reg) and
  310. (SuperRegistersEqual(p.oper[1]^.reg,reg)) and
  311. ((p.oper[0]^.typ = top_const) or
  312. ((p.oper[0]^.typ = top_reg) and
  313. not(SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  314. ((p.oper[0]^.typ = top_ref) and
  315. not RegInRef(reg,p.oper[0]^.ref^)))) or
  316. ((p.opcode = A_POP) and
  317. (SuperRegistersEqual(p.oper[0]^.reg,reg)));
  318. end;
  319. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  320. var
  321. hp2,hp3 : tai;
  322. begin
  323. result:=(p.typ=ait_instruction) and
  324. ((taicpu(p).opcode = A_RET) or
  325. ((taicpu(p).opcode=A_LEAVE) and
  326. GetNextInstruction(p,hp2) and
  327. (hp2.typ=ait_instruction) and
  328. (taicpu(hp2).opcode=A_RET)
  329. ) or
  330. ((taicpu(p).opcode=A_MOV) and
  331. (taicpu(p).oper[0]^.typ=top_reg) and
  332. (taicpu(p).oper[0]^.reg=NR_EBP) and
  333. (taicpu(p).oper[1]^.typ=top_reg) and
  334. (taicpu(p).oper[1]^.reg=NR_ESP) and
  335. GetNextInstruction(p,hp2) and
  336. (hp2.typ=ait_instruction) and
  337. (taicpu(hp2).opcode=A_POP) and
  338. (taicpu(hp2).oper[0]^.typ=top_reg) and
  339. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  340. GetNextInstruction(hp2,hp3) and
  341. (hp3.typ=ait_instruction) and
  342. (taicpu(hp3).opcode=A_RET)
  343. )
  344. );
  345. end;
  346. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  347. begin
  348. isFoldableArithOp := False;
  349. case hp1.opcode of
  350. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  351. isFoldableArithOp :=
  352. ((taicpu(hp1).oper[0]^.typ = top_const) or
  353. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  354. (taicpu(hp1).oper[0]^.reg <> reg))) and
  355. (taicpu(hp1).oper[1]^.typ = top_reg) and
  356. (taicpu(hp1).oper[1]^.reg = reg);
  357. A_INC,A_DEC,A_NEG,A_NOT:
  358. isFoldableArithOp :=
  359. (taicpu(hp1).oper[0]^.typ = top_reg) and
  360. (taicpu(hp1).oper[0]^.reg = reg);
  361. end;
  362. end;
  363. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  364. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  365. var
  366. hp2: tai;
  367. begin
  368. hp2 := p;
  369. repeat
  370. hp2 := tai(hp2.previous);
  371. if assigned(hp2) and
  372. (hp2.typ = ait_regalloc) and
  373. (tai_regalloc(hp2).ratype=ra_dealloc) and
  374. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  375. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  376. begin
  377. asml.remove(hp2);
  378. hp2.free;
  379. break;
  380. end;
  381. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  382. end;
  383. begin
  384. case current_procinfo.procdef.returndef.typ of
  385. arraydef,recorddef,pointerdef,
  386. stringdef,enumdef,procdef,objectdef,errordef,
  387. filedef,setdef,procvardef,
  388. classrefdef,forwarddef:
  389. DoRemoveLastDeallocForFuncRes(RS_EAX);
  390. orddef:
  391. if current_procinfo.procdef.returndef.size <> 0 then
  392. begin
  393. DoRemoveLastDeallocForFuncRes(RS_EAX);
  394. { for int64/qword }
  395. if current_procinfo.procdef.returndef.size = 8 then
  396. DoRemoveLastDeallocForFuncRes(RS_EDX);
  397. end;
  398. end;
  399. end;
  400. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  401. var
  402. TmpUsedRegs : TAllUsedRegs;
  403. hp1,hp2 : tai;
  404. begin
  405. result:=false;
  406. if MatchOpType(taicpu(p),top_reg,top_reg) then
  407. begin
  408. { vmova* reg1,reg1
  409. =>
  410. <nop> }
  411. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  412. begin
  413. GetNextInstruction(p,hp1);
  414. asml.Remove(p);
  415. p.Free;
  416. p:=hp1;
  417. result:=true;
  418. end
  419. else if GetNextInstruction(p,hp1) then
  420. begin
  421. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  422. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  423. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  424. begin
  425. { vmova* reg1,reg2
  426. vmova* reg2,reg3
  427. dealloc reg2
  428. =>
  429. vmova* reg1,reg3 }
  430. CopyUsedRegs(TmpUsedRegs);
  431. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  432. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  433. begin
  434. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  435. asml.Remove(hp1);
  436. hp1.Free;
  437. result:=true;
  438. end
  439. { special case:
  440. vmova* reg1,reg2
  441. vmova* reg2,reg1
  442. =>
  443. vmova* reg1,reg2 }
  444. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  445. begin
  446. asml.Remove(hp1);
  447. hp1.Free;
  448. result:=true;
  449. end
  450. end
  451. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  452. { we mix single and double opperations here because we assume that the compiler
  453. generates vmovapd only after double operations and vmovaps only after single operations }
  454. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  455. GetNextInstruction(hp1,hp2) and
  456. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  457. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  458. begin
  459. CopyUsedRegs(TmpUsedRegs);
  460. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  461. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  462. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  463. then
  464. begin
  465. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  466. asml.Remove(p);
  467. p.Free;
  468. asml.Remove(hp2);
  469. hp2.Free;
  470. p:=hp1;
  471. end;
  472. end;
  473. end;
  474. end;
  475. end;
  476. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  477. var
  478. TmpUsedRegs : TAllUsedRegs;
  479. hp1 : tai;
  480. begin
  481. result:=false;
  482. if GetNextInstruction(p,hp1) and
  483. { we mix single and double opperations here because we assume that the compiler
  484. generates vmovapd only after double operations and vmovaps only after single operations }
  485. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  486. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  487. (taicpu(hp1).oper[1]^.typ=top_reg) then
  488. begin
  489. CopyUsedRegs(TmpUsedRegs);
  490. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  491. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  492. ) then
  493. begin
  494. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  495. asml.Remove(hp1);
  496. hp1.Free;
  497. result:=true;
  498. end;
  499. end;
  500. end;
  501. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  502. var
  503. hp1, hp2: tai;
  504. TmpUsedRegs : TAllUsedRegs;
  505. GetNextIntruction_p : Boolean;
  506. begin
  507. Result:=false;
  508. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  509. if GetNextIntruction_p and
  510. MatchInstruction(hp1,A_AND,[]) and
  511. (taicpu(p).oper[1]^.typ = top_reg) and
  512. MatchOpType(taicpu(hp1),top_const,top_reg) and
  513. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  514. case taicpu(p).opsize Of
  515. S_L:
  516. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  517. begin
  518. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  519. asml.remove(hp1);
  520. hp1.free;
  521. Result:=true;
  522. exit;
  523. end;
  524. end
  525. else if GetNextIntruction_p and
  526. MatchInstruction(hp1,A_MOV,[]) and
  527. (taicpu(p).oper[1]^.typ = top_reg) and
  528. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  529. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  530. begin
  531. CopyUsedRegs(TmpUsedRegs);
  532. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  533. { we have
  534. mov x, %treg
  535. mov %treg, y
  536. }
  537. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  538. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  539. { we've got
  540. mov x, %treg
  541. mov %treg, y
  542. with %treg is not used after }
  543. case taicpu(p).oper[0]^.typ Of
  544. top_reg:
  545. begin
  546. { change
  547. mov %reg, %treg
  548. mov %treg, y
  549. to
  550. mov %reg, y
  551. }
  552. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  553. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  554. asml.remove(hp1);
  555. hp1.free;
  556. ReleaseUsedRegs(TmpUsedRegs);
  557. Exit;
  558. end;
  559. top_ref:
  560. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  561. begin
  562. { change
  563. mov mem, %treg
  564. mov %treg, %reg
  565. to
  566. mov mem, %reg"
  567. }
  568. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  569. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  570. asml.remove(hp1);
  571. hp1.free;
  572. ReleaseUsedRegs(TmpUsedRegs);
  573. Exit;
  574. end;
  575. end;
  576. ReleaseUsedRegs(TmpUsedRegs);
  577. end
  578. else
  579. { Change
  580. mov %reg1, %reg2
  581. xxx %reg2, ???
  582. to
  583. mov %reg1, %reg2
  584. xxx %reg1, ???
  585. to avoid a write/read penalty
  586. }
  587. if MatchOpType(taicpu(p),top_reg,top_reg) and
  588. GetNextInstruction(p,hp1) and
  589. (tai(hp1).typ = ait_instruction) and
  590. (taicpu(hp1).ops >= 1) and
  591. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  592. { we have
  593. mov %reg1, %reg2
  594. XXX %reg2, ???
  595. }
  596. begin
  597. if ((taicpu(hp1).opcode = A_OR) or
  598. (taicpu(hp1).opcode = A_TEST)) and
  599. (taicpu(hp1).oper[1]^.typ = top_reg) and
  600. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  601. { we have
  602. mov %reg1, %reg2
  603. test/or %reg2, %reg2
  604. }
  605. begin
  606. CopyUsedRegs(TmpUsedRegs);
  607. { reg1 will be used after the first instruction,
  608. so update the allocation info }
  609. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  610. if GetNextInstruction(hp1, hp2) and
  611. (hp2.typ = ait_instruction) and
  612. taicpu(hp2).is_jmp and
  613. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  614. { change
  615. mov %reg1, %reg2
  616. test/or %reg2, %reg2
  617. jxx
  618. to
  619. test %reg1, %reg1
  620. jxx
  621. }
  622. begin
  623. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  624. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  625. asml.remove(p);
  626. p.free;
  627. p := hp1;
  628. ReleaseUsedRegs(TmpUsedRegs);
  629. Exit;
  630. end
  631. else
  632. { change
  633. mov %reg1, %reg2
  634. test/or %reg2, %reg2
  635. to
  636. mov %reg1, %reg2
  637. test/or %reg1, %reg1
  638. }
  639. begin
  640. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  641. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  642. end;
  643. ReleaseUsedRegs(TmpUsedRegs);
  644. end
  645. end
  646. else
  647. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  648. x >= RetOffset) as it doesn't do anything (it writes either to a
  649. parameter or to the temporary storage room for the function
  650. result)
  651. }
  652. if GetNextIntruction_p and
  653. (tai(hp1).typ = ait_instruction) then
  654. begin
  655. if IsExitCode(hp1) and
  656. MatchOpType(p,top_reg,top_ref) and
  657. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  658. not(assigned(current_procinfo.procdef.funcretsym) and
  659. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  660. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  661. begin
  662. asml.remove(p);
  663. p.free;
  664. p:=hp1;
  665. DebugMsg('Peephole removed deadstore before leave/ret',p);
  666. RemoveLastDeallocForFuncRes(p);
  667. exit;
  668. end
  669. { change
  670. mov reg1, mem1
  671. cmp x, mem1
  672. to
  673. mov reg1, mem1
  674. cmp x, reg1
  675. }
  676. else if MatchOpType(p,top_reg,top_ref) and
  677. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  678. (taicpu(hp1).oper[1]^.typ = top_ref) and
  679. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  680. begin
  681. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  682. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  683. end;
  684. end;
  685. { Next instruction is also a MOV ? }
  686. if GetNextIntruction_p and
  687. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  688. begin
  689. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  690. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  691. { mov reg1, mem1 or mov mem1, reg1
  692. mov mem2, reg2 mov reg2, mem2}
  693. begin
  694. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  695. { mov reg1, mem1 or mov mem1, reg1
  696. mov mem2, reg1 mov reg2, mem1}
  697. begin
  698. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  699. { Removes the second statement from
  700. mov reg1, mem1/reg2
  701. mov mem1/reg2, reg1 }
  702. begin
  703. if taicpu(p).oper[0]^.typ=top_reg then
  704. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  705. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  706. asml.remove(hp1);
  707. hp1.free;
  708. Result:=true;
  709. exit;
  710. end
  711. else
  712. begin
  713. CopyUsedRegs(TmpUsedRegs);
  714. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  715. if (taicpu(p).oper[1]^.typ = top_ref) and
  716. { mov reg1, mem1
  717. mov mem2, reg1 }
  718. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  719. GetNextInstruction(hp1, hp2) and
  720. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  721. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  722. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  723. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  724. { change to
  725. mov reg1, mem1 mov reg1, mem1
  726. mov mem2, reg1 cmp reg1, mem2
  727. cmp mem1, reg1
  728. }
  729. begin
  730. asml.remove(hp2);
  731. hp2.free;
  732. taicpu(hp1).opcode := A_CMP;
  733. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  734. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  735. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  736. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  737. end;
  738. ReleaseUsedRegs(TmpUsedRegs);
  739. end;
  740. end
  741. else if (taicpu(p).oper[1]^.typ=top_ref) and
  742. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  743. begin
  744. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  745. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  746. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  747. end
  748. else
  749. begin
  750. CopyUsedRegs(TmpUsedRegs);
  751. if GetNextInstruction(hp1, hp2) and
  752. MatchOpType(taicpu(p),top_ref,top_reg) and
  753. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  754. (taicpu(hp1).oper[1]^.typ = top_ref) and
  755. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  756. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  757. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  758. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  759. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  760. { mov mem1, %reg1
  761. mov %reg1, mem2
  762. mov mem2, reg2
  763. to:
  764. mov mem1, reg2
  765. mov reg2, mem2}
  766. begin
  767. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  768. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  769. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  770. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  771. asml.remove(hp2);
  772. hp2.free;
  773. end
  774. {$ifdef i386}
  775. { this is enabled for i386 only, as the rules to create the reg sets below
  776. are too complicated for x86-64, so this makes this code too error prone
  777. on x86-64
  778. }
  779. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  780. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  781. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  782. { mov mem1, reg1 mov mem1, reg1
  783. mov reg1, mem2 mov reg1, mem2
  784. mov mem2, reg2 mov mem2, reg1
  785. to: to:
  786. mov mem1, reg1 mov mem1, reg1
  787. mov mem1, reg2 mov reg1, mem2
  788. mov reg1, mem2
  789. or (if mem1 depends on reg1
  790. and/or if mem2 depends on reg2)
  791. to:
  792. mov mem1, reg1
  793. mov reg1, mem2
  794. mov reg1, reg2
  795. }
  796. begin
  797. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  798. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  799. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  800. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  801. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  802. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  803. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  804. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  805. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  806. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  807. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  808. end
  809. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  810. begin
  811. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  812. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  813. end
  814. else
  815. begin
  816. asml.remove(hp2);
  817. hp2.free;
  818. end
  819. {$endif i386}
  820. ;
  821. ReleaseUsedRegs(TmpUsedRegs);
  822. end;
  823. end
  824. (* { movl [mem1],reg1
  825. movl [mem1],reg2
  826. to
  827. movl [mem1],reg1
  828. movl reg1,reg2
  829. }
  830. else if (taicpu(p).oper[0]^.typ = top_ref) and
  831. (taicpu(p).oper[1]^.typ = top_reg) and
  832. (taicpu(hp1).oper[0]^.typ = top_ref) and
  833. (taicpu(hp1).oper[1]^.typ = top_reg) and
  834. (taicpu(p).opsize = taicpu(hp1).opsize) and
  835. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  836. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  837. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  838. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  839. else*)
  840. { movl const1,[mem1]
  841. movl [mem1],reg1
  842. to
  843. movl const1,reg1
  844. movl reg1,[mem1]
  845. }
  846. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  847. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  848. (taicpu(p).opsize = taicpu(hp1).opsize) and
  849. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  850. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  851. begin
  852. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  853. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  854. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  855. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  856. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  857. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  858. end
  859. end
  860. else if (taicpu(p).oper[1]^.typ = top_reg) and
  861. GetNextIntruction_p and
  862. (hp1.typ = ait_instruction) and
  863. GetNextInstruction(hp1, hp2) and
  864. MatchInstruction(hp2,A_MOV,[]) and
  865. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  866. (taicpu(hp2).oper[0]^.typ=top_reg) and
  867. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  868. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  869. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  870. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  871. ) then
  872. { change movsX/movzX reg/ref, reg2
  873. add/sub/or/... reg3/$const, reg2
  874. mov reg2 reg/ref
  875. to add/sub/or/... reg3/$const, reg/ref }
  876. begin
  877. CopyUsedRegs(TmpUsedRegs);
  878. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  879. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  880. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  881. begin
  882. { by example:
  883. movswl %si,%eax movswl %si,%eax p
  884. decl %eax addl %edx,%eax hp1
  885. movw %ax,%si movw %ax,%si hp2
  886. ->
  887. movswl %si,%eax movswl %si,%eax p
  888. decw %eax addw %edx,%eax hp1
  889. movw %ax,%si movw %ax,%si hp2
  890. }
  891. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  892. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  893. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  894. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  895. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  896. {
  897. ->
  898. movswl %si,%eax movswl %si,%eax p
  899. decw %si addw %dx,%si hp1
  900. movw %ax,%si movw %ax,%si hp2
  901. }
  902. case taicpu(hp1).ops of
  903. 1:
  904. begin
  905. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  906. if taicpu(hp1).oper[0]^.typ=top_reg then
  907. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  908. end;
  909. 2:
  910. begin
  911. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  912. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  913. (taicpu(hp1).opcode<>A_SHL) and
  914. (taicpu(hp1).opcode<>A_SHR) and
  915. (taicpu(hp1).opcode<>A_SAR) then
  916. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  917. end;
  918. else
  919. internalerror(2008042701);
  920. end;
  921. {
  922. ->
  923. decw %si addw %dx,%si p
  924. }
  925. asml.remove(p);
  926. asml.remove(hp2);
  927. p.Free;
  928. hp2.Free;
  929. p := hp1;
  930. end;
  931. ReleaseUsedRegs(TmpUsedRegs);
  932. end
  933. else if GetNextIntruction_p and
  934. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  935. GetNextInstruction(hp1, hp2) and
  936. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  937. MatchOperand(Taicpu(p).oper[0]^,0) and
  938. (Taicpu(p).oper[1]^.typ = top_reg) and
  939. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  940. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  941. { mov reg1,0
  942. bts reg1,operand1 --> mov reg1,operand2
  943. or reg1,operand2 bts reg1,operand1}
  944. begin
  945. Taicpu(hp2).opcode:=A_MOV;
  946. asml.remove(hp1);
  947. insertllitem(hp2,hp2.next,hp1);
  948. asml.remove(p);
  949. p.free;
  950. p:=hp1;
  951. end
  952. else if GetNextIntruction_p and
  953. MatchInstruction(hp1,A_LEA,[S_L]) and
  954. MatchOpType(Taicpu(p),top_ref,top_reg) and
  955. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  956. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  957. ) or
  958. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  959. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  960. )
  961. ) then
  962. { mov reg1,ref
  963. lea reg2,[reg1,reg2]
  964. to
  965. add reg2,ref}
  966. begin
  967. CopyUsedRegs(TmpUsedRegs);
  968. { reg1 may not be used afterwards }
  969. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  970. begin
  971. Taicpu(hp1).opcode:=A_ADD;
  972. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  973. DebugMsg('Peephole MovLea2Add done',hp1);
  974. asml.remove(p);
  975. p.free;
  976. p:=hp1;
  977. end;
  978. ReleaseUsedRegs(TmpUsedRegs);
  979. end;
  980. end;
  981. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  982. var
  983. TmpUsedRegs : TAllUsedRegs;
  984. hp1,hp2: tai;
  985. begin
  986. Result:=false;
  987. if MatchOpType(taicpu(p),top_reg,top_reg) and
  988. GetNextInstruction(p, hp1) and
  989. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  990. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  991. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  992. or
  993. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  994. ) and
  995. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  996. { mov reg1, reg2
  997. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  998. begin
  999. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1000. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1001. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1002. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1003. asml.remove(p);
  1004. p.free;
  1005. p := hp1;
  1006. Result:=true;
  1007. exit;
  1008. end
  1009. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1010. GetNextInstruction(p,hp1) and
  1011. (hp1.typ = ait_instruction) and
  1012. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1013. doing it separately in both branches allows to do the cheap checks
  1014. with low probability earlier }
  1015. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1016. GetNextInstruction(hp1,hp2) and
  1017. MatchInstruction(hp2,A_MOV,[])
  1018. ) or
  1019. ((taicpu(hp1).opcode=A_LEA) and
  1020. GetNextInstruction(hp1,hp2) and
  1021. MatchInstruction(hp2,A_MOV,[]) and
  1022. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1023. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1024. ) or
  1025. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1026. taicpu(p).oper[1]^.reg) and
  1027. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1028. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1029. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1030. ) and
  1031. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1032. )
  1033. ) and
  1034. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1035. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1036. begin
  1037. CopyUsedRegs(TmpUsedRegs);
  1038. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1039. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1040. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1041. { change mov (ref), reg
  1042. add/sub/or/... reg2/$const, reg
  1043. mov reg, (ref)
  1044. # release reg
  1045. to add/sub/or/... reg2/$const, (ref) }
  1046. begin
  1047. case taicpu(hp1).opcode of
  1048. A_INC,A_DEC,A_NOT,A_NEG :
  1049. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1050. A_LEA :
  1051. begin
  1052. taicpu(hp1).opcode:=A_ADD;
  1053. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1054. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1055. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1056. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1057. else
  1058. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1059. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1060. DebugMsg('Peephole FoldLea done',hp1);
  1061. end
  1062. else
  1063. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1064. end;
  1065. asml.remove(p);
  1066. asml.remove(hp2);
  1067. p.free;
  1068. hp2.free;
  1069. p := hp1
  1070. end;
  1071. ReleaseUsedRegs(TmpUsedRegs);
  1072. end;
  1073. end;
  1074. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1075. var
  1076. hp1 : tai;
  1077. begin
  1078. Result:=false;
  1079. if not(GetNextInstruction(p, hp1)) then
  1080. exit;
  1081. if MatchOpType(p,top_const,top_reg) and
  1082. MatchInstruction(hp1,A_AND,[]) and
  1083. MatchOpType(hp1,top_const,top_reg) and
  1084. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1085. { the second register must contain the first one, so compare their subreg types }
  1086. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1087. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1088. { change
  1089. and const1, reg
  1090. and const2, reg
  1091. to
  1092. and (const1 and const2), reg
  1093. }
  1094. begin
  1095. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1096. DebugMsg('Peephole AndAnd2And done',hp1);
  1097. asml.remove(p);
  1098. p.Free;
  1099. p:=hp1;
  1100. Result:=true;
  1101. exit;
  1102. end
  1103. else if MatchOpType(p,top_const,top_reg) and
  1104. MatchInstruction(hp1,A_MOVZX,[]) and
  1105. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1106. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1107. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1108. (((taicpu(p).opsize=S_W) and
  1109. (taicpu(hp1).opsize=S_BW)) or
  1110. ((taicpu(p).opsize=S_L) and
  1111. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1112. {$ifdef x86_64}
  1113. or
  1114. ((taicpu(p).opsize=S_Q) and
  1115. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1116. {$endif x86_64}
  1117. ) then
  1118. begin
  1119. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1120. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1121. ) or
  1122. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1123. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1124. {$ifdef x86_64}
  1125. or
  1126. (((taicpu(hp1).opsize)=S_LQ) and
  1127. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1128. )
  1129. {$endif x86_64}
  1130. then
  1131. begin
  1132. DebugMsg('Peephole AndMovzToAnd done',p);
  1133. asml.remove(hp1);
  1134. hp1.free;
  1135. end;
  1136. end
  1137. else if MatchOpType(p,top_const,top_reg) and
  1138. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1139. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1140. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1141. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1142. (((taicpu(p).opsize=S_W) and
  1143. (taicpu(hp1).opsize=S_BW)) or
  1144. ((taicpu(p).opsize=S_L) and
  1145. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1146. {$ifdef x86_64}
  1147. or
  1148. ((taicpu(p).opsize=S_Q) and
  1149. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1150. {$endif x86_64}
  1151. ) then
  1152. begin
  1153. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1154. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1155. ) or
  1156. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1157. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1158. {$ifdef x86_64}
  1159. or
  1160. (((taicpu(hp1).opsize)=S_LQ) and
  1161. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1162. )
  1163. {$endif x86_64}
  1164. then
  1165. begin
  1166. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1167. asml.remove(hp1);
  1168. hp1.free;
  1169. end;
  1170. end
  1171. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1172. (hp1.typ = ait_instruction) and
  1173. (taicpu(hp1).is_jmp) and
  1174. (taicpu(hp1).opcode<>A_JMP) and
  1175. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1176. { change
  1177. and x, reg
  1178. jxx
  1179. to
  1180. test x, reg
  1181. jxx
  1182. if reg is deallocated before the
  1183. jump, but only if it's a conditional jump (PFV)
  1184. }
  1185. taicpu(p).opcode := A_TEST;
  1186. end;
  1187. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1188. begin
  1189. if MatchOperand(taicpu(p).oper[0]^,0) and
  1190. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1191. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1192. { change "mov $0, %reg" into "xor %reg, %reg" }
  1193. begin
  1194. taicpu(p).opcode := A_XOR;
  1195. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1196. end;
  1197. end;
  1198. end.