aoptx86.pas 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. { do it the safe way: always allocate the full super register,
  200. as we do no register re-allocation in the peephole optimizer,
  201. this does not hurt
  202. }
  203. case getregtype(reg) of
  204. R_MMREGISTER:
  205. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  206. R_INTREGISTER:
  207. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  208. end;
  209. if not(RegInUsedRegs(reg,initialusedregs)) then
  210. begin
  211. hp := tai_regalloc.alloc(reg,nil);
  212. insertllItem(p1.previous,p1,hp);
  213. IncludeRegInUsedRegs(reg,initialusedregs);
  214. end;
  215. while assigned(p1) and
  216. (p1 <> p2) do
  217. begin
  218. if assigned(p1.optinfo) then
  219. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  220. p1 := tai(p1.next);
  221. repeat
  222. while assigned(p1) and
  223. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  224. p1 := tai(p1.next);
  225. { remove all allocation/deallocation info about the register in between }
  226. if assigned(p1) and
  227. (p1.typ = ait_regalloc) then
  228. begin
  229. if (getregtype(reg)=getregtype(tai_regalloc(p1).reg)) and
  230. (getsupreg(tai_regalloc(p1).reg)=getsupreg(reg)) and (tai_regalloc(p1).reg<>reg) then
  231. begin
  232. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  233. internalerror(2016101501);
  234. tai_regalloc(p1).reg:=reg;
  235. end;
  236. if tai_regalloc(p1).reg=reg then
  237. begin
  238. if not removedSomething then
  239. begin
  240. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  241. removedSomething := true;
  242. end;
  243. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  244. hp := tai(p1.Next);
  245. asml.Remove(p1);
  246. p1.free;
  247. p1 := hp;
  248. end
  249. else
  250. p1 := tai(p1.next);
  251. end;
  252. until not(assigned(p1)) or
  253. not(p1.typ in SkipInstr);
  254. end;
  255. if assigned(p1) then
  256. begin
  257. if firstRemovedWasAlloc then
  258. begin
  259. hp := tai_regalloc.Alloc(reg,nil);
  260. insertLLItem(start.previous,start,hp);
  261. end;
  262. if lastRemovedWasDealloc then
  263. begin
  264. hp := tai_regalloc.DeAlloc(reg,nil);
  265. insertLLItem(p1.previous,p1,hp);
  266. end;
  267. end;
  268. end;
  269. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  270. var
  271. p: taicpu;
  272. begin
  273. if not assigned(hp) or
  274. (hp.typ <> ait_instruction) then
  275. begin
  276. Result := false;
  277. exit;
  278. end;
  279. p := taicpu(hp);
  280. Result :=
  281. (((p.opcode = A_MOV) or
  282. (p.opcode = A_MOVZX) or
  283. (p.opcode = A_MOVSX) or
  284. (p.opcode = A_LEA) or
  285. (p.opcode = A_VMOVSS) or
  286. (p.opcode = A_VMOVSD) or
  287. (p.opcode = A_VMOVAPD) or
  288. (p.opcode = A_VMOVAPS) or
  289. (p.opcode = A_VMOVQ) or
  290. (p.opcode = A_MOVSS) or
  291. (p.opcode = A_MOVSD) or
  292. (p.opcode = A_MOVQ) or
  293. (p.opcode = A_MOVAPD) or
  294. (p.opcode = A_MOVAPS)) and
  295. (p.oper[1]^.typ = top_reg) and
  296. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  297. ((p.oper[0]^.typ = top_const) or
  298. ((p.oper[0]^.typ = top_reg) and
  299. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  300. ((p.oper[0]^.typ = top_ref) and
  301. not RegInRef(reg,p.oper[0]^.ref^)))) or
  302. ((p.opcode = A_POP) and
  303. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  304. end;
  305. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  306. var
  307. hp2,hp3 : tai;
  308. begin
  309. result:=(p.typ=ait_instruction) and
  310. ((taicpu(p).opcode = A_RET) or
  311. ((taicpu(p).opcode=A_LEAVE) and
  312. GetNextInstruction(p,hp2) and
  313. (hp2.typ=ait_instruction) and
  314. (taicpu(hp2).opcode=A_RET)
  315. ) or
  316. ((taicpu(p).opcode=A_MOV) and
  317. (taicpu(p).oper[0]^.typ=top_reg) and
  318. (taicpu(p).oper[0]^.reg=NR_EBP) and
  319. (taicpu(p).oper[1]^.typ=top_reg) and
  320. (taicpu(p).oper[1]^.reg=NR_ESP) and
  321. GetNextInstruction(p,hp2) and
  322. (hp2.typ=ait_instruction) and
  323. (taicpu(hp2).opcode=A_POP) and
  324. (taicpu(hp2).oper[0]^.typ=top_reg) and
  325. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  326. GetNextInstruction(hp2,hp3) and
  327. (hp3.typ=ait_instruction) and
  328. (taicpu(hp3).opcode=A_RET)
  329. )
  330. );
  331. end;
  332. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  333. begin
  334. isFoldableArithOp := False;
  335. case hp1.opcode of
  336. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  337. isFoldableArithOp :=
  338. ((taicpu(hp1).oper[0]^.typ = top_const) or
  339. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  340. (taicpu(hp1).oper[0]^.reg <> reg))) and
  341. (taicpu(hp1).oper[1]^.typ = top_reg) and
  342. (taicpu(hp1).oper[1]^.reg = reg);
  343. A_INC,A_DEC,A_NEG,A_NOT:
  344. isFoldableArithOp :=
  345. (taicpu(hp1).oper[0]^.typ = top_reg) and
  346. (taicpu(hp1).oper[0]^.reg = reg);
  347. end;
  348. end;
  349. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  350. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  351. var
  352. hp2: tai;
  353. begin
  354. hp2 := p;
  355. repeat
  356. hp2 := tai(hp2.previous);
  357. if assigned(hp2) and
  358. (hp2.typ = ait_regalloc) and
  359. (tai_regalloc(hp2).ratype=ra_dealloc) and
  360. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  361. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  362. begin
  363. asml.remove(hp2);
  364. hp2.free;
  365. break;
  366. end;
  367. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  368. end;
  369. begin
  370. case current_procinfo.procdef.returndef.typ of
  371. arraydef,recorddef,pointerdef,
  372. stringdef,enumdef,procdef,objectdef,errordef,
  373. filedef,setdef,procvardef,
  374. classrefdef,forwarddef:
  375. DoRemoveLastDeallocForFuncRes(RS_EAX);
  376. orddef:
  377. if current_procinfo.procdef.returndef.size <> 0 then
  378. begin
  379. DoRemoveLastDeallocForFuncRes(RS_EAX);
  380. { for int64/qword }
  381. if current_procinfo.procdef.returndef.size = 8 then
  382. DoRemoveLastDeallocForFuncRes(RS_EDX);
  383. end;
  384. end;
  385. end;
  386. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  387. var
  388. TmpUsedRegs : TAllUsedRegs;
  389. hp1,hp2 : tai;
  390. begin
  391. result:=false;
  392. if MatchOpType(taicpu(p),top_reg,top_reg) then
  393. begin
  394. { vmova* reg1,reg1
  395. =>
  396. <nop> }
  397. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  398. begin
  399. GetNextInstruction(p,hp1);
  400. asml.Remove(p);
  401. p.Free;
  402. p:=hp1;
  403. result:=true;
  404. end
  405. else if GetNextInstruction(p,hp1) then
  406. begin
  407. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  408. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  409. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  410. begin
  411. { vmova* reg1,reg2
  412. vmova* reg2,reg3
  413. dealloc reg2
  414. =>
  415. vmova* reg1,reg3 }
  416. CopyUsedRegs(TmpUsedRegs);
  417. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  418. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  419. begin
  420. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  421. asml.Remove(hp1);
  422. hp1.Free;
  423. result:=true;
  424. end
  425. { special case:
  426. vmova* reg1,reg2
  427. vmova* reg2,reg1
  428. =>
  429. vmova* reg1,reg2 }
  430. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  431. begin
  432. asml.Remove(hp1);
  433. hp1.Free;
  434. result:=true;
  435. end
  436. end
  437. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  438. { we mix single and double opperations here because we assume that the compiler
  439. generates vmovapd only after double operations and vmovaps only after single operations }
  440. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  441. GetNextInstruction(hp1,hp2) and
  442. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  443. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  444. begin
  445. CopyUsedRegs(TmpUsedRegs);
  446. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  447. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  448. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  449. then
  450. begin
  451. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  452. asml.Remove(p);
  453. p.Free;
  454. asml.Remove(hp2);
  455. hp2.Free;
  456. p:=hp1;
  457. end;
  458. end;
  459. end;
  460. end;
  461. end;
  462. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  463. var
  464. TmpUsedRegs : TAllUsedRegs;
  465. hp1 : tai;
  466. begin
  467. result:=false;
  468. if GetNextInstruction(p,hp1) and
  469. { we mix single and double opperations here because we assume that the compiler
  470. generates vmovapd only after double operations and vmovaps only after single operations }
  471. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  472. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  473. (taicpu(hp1).oper[1]^.typ=top_reg) then
  474. begin
  475. CopyUsedRegs(TmpUsedRegs);
  476. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  477. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  478. ) then
  479. begin
  480. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  481. asml.Remove(hp1);
  482. hp1.Free;
  483. result:=true;
  484. end;
  485. end;
  486. end;
  487. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  488. var
  489. hp1, hp2: tai;
  490. TmpUsedRegs : TAllUsedRegs;
  491. GetNextIntruction_p : Boolean;
  492. begin
  493. Result:=false;
  494. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  495. if GetNextIntruction_p and
  496. MatchInstruction(hp1,A_AND,[]) and
  497. (taicpu(p).oper[1]^.typ = top_reg) and
  498. MatchOpType(taicpu(hp1),top_const,top_reg) and
  499. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  500. case taicpu(p).opsize Of
  501. S_L:
  502. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  503. begin
  504. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  505. asml.remove(hp1);
  506. hp1.free;
  507. Result:=true;
  508. exit;
  509. end;
  510. end
  511. else if GetNextIntruction_p and
  512. MatchInstruction(hp1,A_MOV,[]) and
  513. (taicpu(p).oper[1]^.typ = top_reg) and
  514. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  515. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  516. begin
  517. CopyUsedRegs(TmpUsedRegs);
  518. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  519. { we have
  520. mov x, %treg
  521. mov %treg, y
  522. }
  523. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  524. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  525. { we've got
  526. mov x, %treg
  527. mov %treg, y
  528. with %treg is not used after }
  529. case taicpu(p).oper[0]^.typ Of
  530. top_reg:
  531. begin
  532. { change
  533. mov %reg, %treg
  534. mov %treg, y
  535. to
  536. mov %reg, y
  537. }
  538. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  539. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  540. asml.remove(hp1);
  541. hp1.free;
  542. ReleaseUsedRegs(TmpUsedRegs);
  543. Exit;
  544. end;
  545. top_ref:
  546. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  547. begin
  548. { change
  549. mov mem, %treg
  550. mov %treg, %reg
  551. to
  552. mov mem, %reg"
  553. }
  554. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  555. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  556. asml.remove(hp1);
  557. hp1.free;
  558. ReleaseUsedRegs(TmpUsedRegs);
  559. Exit;
  560. end;
  561. end;
  562. ReleaseUsedRegs(TmpUsedRegs);
  563. end
  564. else
  565. { Change
  566. mov %reg1, %reg2
  567. xxx %reg2, ???
  568. to
  569. mov %reg1, %reg2
  570. xxx %reg1, ???
  571. to avoid a write/read penalty
  572. }
  573. if MatchOpType(taicpu(p),top_reg,top_reg) and
  574. GetNextInstruction(p,hp1) and
  575. (tai(hp1).typ = ait_instruction) and
  576. (taicpu(hp1).ops >= 1) and
  577. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  578. { we have
  579. mov %reg1, %reg2
  580. XXX %reg2, ???
  581. }
  582. begin
  583. if ((taicpu(hp1).opcode = A_OR) or
  584. (taicpu(hp1).opcode = A_TEST)) and
  585. (taicpu(hp1).oper[1]^.typ = top_reg) and
  586. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  587. { we have
  588. mov %reg1, %reg2
  589. test/or %reg2, %reg2
  590. }
  591. begin
  592. CopyUsedRegs(TmpUsedRegs);
  593. { reg1 will be used after the first instruction,
  594. so update the allocation info }
  595. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  596. if GetNextInstruction(hp1, hp2) and
  597. (hp2.typ = ait_instruction) and
  598. taicpu(hp2).is_jmp and
  599. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  600. { change
  601. mov %reg1, %reg2
  602. test/or %reg2, %reg2
  603. jxx
  604. to
  605. test %reg1, %reg1
  606. jxx
  607. }
  608. begin
  609. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  610. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  611. asml.remove(p);
  612. p.free;
  613. p := hp1;
  614. ReleaseUsedRegs(TmpUsedRegs);
  615. Exit;
  616. end
  617. else
  618. { change
  619. mov %reg1, %reg2
  620. test/or %reg2, %reg2
  621. to
  622. mov %reg1, %reg2
  623. test/or %reg1, %reg1
  624. }
  625. begin
  626. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  627. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  628. end;
  629. ReleaseUsedRegs(TmpUsedRegs);
  630. end
  631. end
  632. else
  633. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  634. x >= RetOffset) as it doesn't do anything (it writes either to a
  635. parameter or to the temporary storage room for the function
  636. result)
  637. }
  638. if GetNextIntruction_p and
  639. (tai(hp1).typ = ait_instruction) then
  640. begin
  641. if IsExitCode(hp1) and
  642. MatchOpType(p,top_reg,top_ref) and
  643. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  644. not(assigned(current_procinfo.procdef.funcretsym) and
  645. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  646. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  647. begin
  648. asml.remove(p);
  649. p.free;
  650. p := hp1;
  651. DebugMsg('Peephole removed deadstore before leave/ret',p);
  652. RemoveLastDeallocForFuncRes(p);
  653. end
  654. { change
  655. mov reg1, mem1
  656. cmp x, mem1
  657. to
  658. mov reg1, mem1
  659. cmp x, reg1
  660. }
  661. else if MatchOpType(p,top_reg,top_ref) and
  662. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  663. (taicpu(hp1).oper[1]^.typ = top_ref) and
  664. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  665. begin
  666. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  667. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  668. end;
  669. end;
  670. { Next instruction is also a MOV ? }
  671. if GetNextIntruction_p and
  672. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  673. begin
  674. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  675. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  676. { mov reg1, mem1 or mov mem1, reg1
  677. mov mem2, reg2 mov reg2, mem2}
  678. begin
  679. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  680. { mov reg1, mem1 or mov mem1, reg1
  681. mov mem2, reg1 mov reg2, mem1}
  682. begin
  683. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  684. { Removes the second statement from
  685. mov reg1, mem1/reg2
  686. mov mem1/reg2, reg1 }
  687. begin
  688. if taicpu(p).oper[0]^.typ=top_reg then
  689. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  690. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  691. asml.remove(hp1);
  692. hp1.free;
  693. Result:=true;
  694. exit;
  695. end
  696. else
  697. begin
  698. CopyUsedRegs(TmpUsedRegs);
  699. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  700. if (taicpu(p).oper[1]^.typ = top_ref) and
  701. { mov reg1, mem1
  702. mov mem2, reg1 }
  703. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  704. GetNextInstruction(hp1, hp2) and
  705. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  706. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  707. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  708. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  709. { change to
  710. mov reg1, mem1 mov reg1, mem1
  711. mov mem2, reg1 cmp reg1, mem2
  712. cmp mem1, reg1
  713. }
  714. begin
  715. asml.remove(hp2);
  716. hp2.free;
  717. taicpu(hp1).opcode := A_CMP;
  718. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  719. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  720. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  721. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  722. end;
  723. ReleaseUsedRegs(TmpUsedRegs);
  724. end;
  725. end
  726. else if (taicpu(p).oper[1]^.typ=top_ref) and
  727. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  728. begin
  729. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  730. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  731. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  732. end
  733. else
  734. begin
  735. CopyUsedRegs(TmpUsedRegs);
  736. if GetNextInstruction(hp1, hp2) and
  737. MatchOpType(taicpu(p),top_ref,top_reg) and
  738. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  739. (taicpu(hp1).oper[1]^.typ = top_ref) and
  740. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  741. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  742. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  743. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  744. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  745. { mov mem1, %reg1
  746. mov %reg1, mem2
  747. mov mem2, reg2
  748. to:
  749. mov mem1, reg2
  750. mov reg2, mem2}
  751. begin
  752. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  753. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  754. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  755. asml.remove(hp2);
  756. hp2.free;
  757. end
  758. {$ifdef i386}
  759. { this is enabled for i386 only, as the rules to create the reg sets below
  760. are too complicated for x86-64, so this makes this code too error prone
  761. on x86-64
  762. }
  763. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  764. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  765. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  766. { mov mem1, reg1 mov mem1, reg1
  767. mov reg1, mem2 mov reg1, mem2
  768. mov mem2, reg2 mov mem2, reg1
  769. to: to:
  770. mov mem1, reg1 mov mem1, reg1
  771. mov mem1, reg2 mov reg1, mem2
  772. mov reg1, mem2
  773. or (if mem1 depends on reg1
  774. and/or if mem2 depends on reg2)
  775. to:
  776. mov mem1, reg1
  777. mov reg1, mem2
  778. mov reg1, reg2
  779. }
  780. begin
  781. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  782. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  783. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  784. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  785. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  786. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  787. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  788. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  789. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  790. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  791. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  792. end
  793. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  794. begin
  795. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  796. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  797. end
  798. else
  799. begin
  800. asml.remove(hp2);
  801. hp2.free;
  802. end
  803. {$endif i386}
  804. ;
  805. ReleaseUsedRegs(TmpUsedRegs);
  806. end;
  807. end
  808. (* { movl [mem1],reg1
  809. movl [mem1],reg2
  810. to
  811. movl [mem1],reg1
  812. movl reg1,reg2
  813. }
  814. else if (taicpu(p).oper[0]^.typ = top_ref) and
  815. (taicpu(p).oper[1]^.typ = top_reg) and
  816. (taicpu(hp1).oper[0]^.typ = top_ref) and
  817. (taicpu(hp1).oper[1]^.typ = top_reg) and
  818. (taicpu(p).opsize = taicpu(hp1).opsize) and
  819. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  820. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  821. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  822. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  823. else*)
  824. { movl const1,[mem1]
  825. movl [mem1],reg1
  826. to
  827. movl const1,reg1
  828. movl reg1,[mem1]
  829. }
  830. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  831. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  832. (taicpu(p).opsize = taicpu(hp1).opsize) and
  833. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  834. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  835. begin
  836. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  837. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  838. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  839. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  840. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  841. end
  842. end
  843. else if (taicpu(p).oper[1]^.typ = top_reg) and
  844. GetNextIntruction_p and
  845. (hp1.typ = ait_instruction) and
  846. GetNextInstruction(hp1, hp2) and
  847. MatchInstruction(hp2,A_MOV,[]) and
  848. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  849. (taicpu(hp2).oper[0]^.typ=top_reg) and
  850. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  851. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  852. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  853. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  854. ) then
  855. { change movsX/movzX reg/ref, reg2
  856. add/sub/or/... reg3/$const, reg2
  857. mov reg2 reg/ref
  858. to add/sub/or/... reg3/$const, reg/ref }
  859. begin
  860. CopyUsedRegs(TmpUsedRegs);
  861. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  862. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  863. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  864. begin
  865. { by example:
  866. movswl %si,%eax movswl %si,%eax p
  867. decl %eax addl %edx,%eax hp1
  868. movw %ax,%si movw %ax,%si hp2
  869. ->
  870. movswl %si,%eax movswl %si,%eax p
  871. decw %eax addw %edx,%eax hp1
  872. movw %ax,%si movw %ax,%si hp2
  873. }
  874. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  875. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  876. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  877. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  878. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  879. {
  880. ->
  881. movswl %si,%eax movswl %si,%eax p
  882. decw %si addw %dx,%si hp1
  883. movw %ax,%si movw %ax,%si hp2
  884. }
  885. case taicpu(hp1).ops of
  886. 1:
  887. begin
  888. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  889. if taicpu(hp1).oper[0]^.typ=top_reg then
  890. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  891. end;
  892. 2:
  893. begin
  894. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  895. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  896. (taicpu(hp1).opcode<>A_SHL) and
  897. (taicpu(hp1).opcode<>A_SHR) and
  898. (taicpu(hp1).opcode<>A_SAR) then
  899. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  900. end;
  901. else
  902. internalerror(2008042701);
  903. end;
  904. {
  905. ->
  906. decw %si addw %dx,%si p
  907. }
  908. asml.remove(p);
  909. asml.remove(hp2);
  910. p.Free;
  911. hp2.Free;
  912. p := hp1;
  913. end;
  914. ReleaseUsedRegs(TmpUsedRegs);
  915. end;
  916. if GetNextIntruction_p and
  917. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  918. GetNextInstruction(hp1, hp2) and
  919. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  920. MatchOperand(Taicpu(p).oper[0]^,0) and
  921. (Taicpu(p).oper[1]^.typ = top_reg) and
  922. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  923. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  924. { mov reg1,0
  925. bts reg1,operand1 --> mov reg1,operand2
  926. or reg1,operand2 bts reg1,operand1}
  927. begin
  928. Taicpu(hp2).opcode:=A_MOV;
  929. asml.remove(hp1);
  930. insertllitem(hp2,hp2.next,hp1);
  931. asml.remove(p);
  932. p.free;
  933. p:=hp1;
  934. end;
  935. if GetNextIntruction_p and
  936. MatchInstruction(hp1,A_LEA,[S_L]) and
  937. MatchOpType(Taicpu(p),top_ref,top_reg) and
  938. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  939. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  940. ) or
  941. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  942. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  943. )
  944. ) then
  945. { mov reg1,ref
  946. lea reg2,[reg1,reg2]
  947. to
  948. add reg2,ref}
  949. begin
  950. CopyUsedRegs(TmpUsedRegs);
  951. { reg1 may not be used afterwards }
  952. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  953. begin
  954. Taicpu(hp1).opcode:=A_ADD;
  955. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  956. DebugMsg('Peephole MovLea2Add done',hp1);
  957. asml.remove(p);
  958. p.free;
  959. p:=hp1;
  960. end;
  961. ReleaseUsedRegs(TmpUsedRegs);
  962. end;
  963. end;
  964. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  965. var
  966. TmpUsedRegs : TAllUsedRegs;
  967. hp1,hp2: tai;
  968. begin
  969. Result:=false;
  970. if MatchOpType(taicpu(p),top_reg,top_reg) and
  971. GetNextInstruction(p, hp1) and
  972. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  973. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  974. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  975. or
  976. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  977. ) and
  978. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  979. { mov reg1, reg2
  980. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  981. begin
  982. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  983. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  984. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  985. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  986. asml.remove(p);
  987. p.free;
  988. p := hp1;
  989. Result:=true;
  990. exit;
  991. end
  992. else if (taicpu(p).oper[0]^.typ = top_ref) and
  993. GetNextInstruction(p,hp1) and
  994. (hp1.typ = ait_instruction) and
  995. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  996. ((taicpu(hp1).opcode=A_LEA) and
  997. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  998. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  999. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1000. ) or
  1001. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1002. taicpu(p).oper[1]^.reg) and
  1003. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  1004. )
  1005. )
  1006. ) and
  1007. GetNextInstruction(hp1,hp2) and
  1008. MatchInstruction(hp2,A_MOV,[]) and
  1009. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1010. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1011. begin
  1012. CopyUsedRegs(TmpUsedRegs);
  1013. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1014. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1015. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2, TmpUsedRegs))) then
  1016. { change mov (ref), reg
  1017. add/sub/or/... reg2/$const, reg
  1018. mov reg, (ref)
  1019. # release reg
  1020. to add/sub/or/... reg2/$const, (ref) }
  1021. begin
  1022. case taicpu(hp1).opcode of
  1023. A_INC,A_DEC,A_NOT,A_NEG :
  1024. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1025. A_LEA :
  1026. begin
  1027. taicpu(hp1).opcode:=A_ADD;
  1028. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1029. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1030. else
  1031. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1032. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1033. DebugMsg('Peephole FoldLea done',hp1);
  1034. end
  1035. else
  1036. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1037. end;
  1038. asml.remove(p);
  1039. asml.remove(hp2);
  1040. p.free;
  1041. hp2.free;
  1042. p := hp1
  1043. end;
  1044. ReleaseUsedRegs(TmpUsedRegs);
  1045. end;
  1046. end;
  1047. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1048. var
  1049. hp1 : tai;
  1050. begin
  1051. Result:=false;
  1052. if not(GetNextInstruction(p, hp1)) then
  1053. exit;
  1054. if MatchOpType(p,top_const,top_reg) and
  1055. MatchInstruction(hp1,A_AND,[]) and
  1056. MatchOpType(hp1,top_const,top_reg) and
  1057. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1058. { the second register must contain the first one, so compare their subreg types }
  1059. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1060. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1061. { change
  1062. and const1, reg
  1063. and const2, reg
  1064. to
  1065. and (const1 and const2), reg
  1066. }
  1067. begin
  1068. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1069. DebugMsg('Peephole AndAnd2And done',hp1);
  1070. asml.remove(p);
  1071. p.Free;
  1072. p:=hp1;
  1073. Result:=true;
  1074. exit;
  1075. end
  1076. else if MatchOpType(p,top_const,top_reg) and
  1077. MatchInstruction(hp1,A_MOVZX,[]) and
  1078. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1079. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1080. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1081. (((taicpu(p).opsize=S_W) and
  1082. (taicpu(hp1).opsize=S_BW)) or
  1083. ((taicpu(p).opsize=S_L) and
  1084. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1085. {$ifdef x86_64}
  1086. or
  1087. ((taicpu(p).opsize=S_Q) and
  1088. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1089. {$endif x86_64}
  1090. ) then
  1091. begin
  1092. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1093. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1094. ) or
  1095. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1096. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1097. {$ifdef x86_64}
  1098. or
  1099. (((taicpu(hp1).opsize)=S_LQ) and
  1100. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1101. )
  1102. {$endif x86_64}
  1103. then
  1104. begin
  1105. DebugMsg('Peephole AndMovzToAnd done',p);
  1106. asml.remove(hp1);
  1107. hp1.free;
  1108. end;
  1109. end
  1110. else if MatchOpType(p,top_const,top_reg) and
  1111. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1112. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1113. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1114. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1115. (((taicpu(p).opsize=S_W) and
  1116. (taicpu(hp1).opsize=S_BW)) or
  1117. ((taicpu(p).opsize=S_L) and
  1118. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1119. {$ifdef x86_64}
  1120. or
  1121. ((taicpu(p).opsize=S_Q) and
  1122. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1123. {$endif x86_64}
  1124. ) then
  1125. begin
  1126. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1127. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1128. ) or
  1129. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1130. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1131. {$ifdef x86_64}
  1132. or
  1133. (((taicpu(hp1).opsize)=S_LQ) and
  1134. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1135. )
  1136. {$endif x86_64}
  1137. then
  1138. begin
  1139. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1140. asml.remove(hp1);
  1141. hp1.free;
  1142. end;
  1143. end
  1144. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1145. (hp1.typ = ait_instruction) and
  1146. (taicpu(hp1).is_jmp) and
  1147. (taicpu(hp1).opcode<>A_JMP) and
  1148. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1149. { change
  1150. and x, reg
  1151. jxx
  1152. to
  1153. test x, reg
  1154. jxx
  1155. if reg is deallocated before the
  1156. jump, but only if it's a conditional jump (PFV)
  1157. }
  1158. taicpu(p).opcode := A_TEST;
  1159. end;
  1160. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1161. begin
  1162. if MatchOperand(taicpu(p).oper[0]^,0) and
  1163. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1164. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1165. { change "mov $0, %reg" into "xor %reg, %reg" }
  1166. begin
  1167. taicpu(p).opcode := A_XOR;
  1168. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1169. end;
  1170. end;
  1171. end.