aoptx86.pas 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. if not(RegInUsedRegs(reg,initialusedregs)) then
  200. begin
  201. hp := tai_regalloc.alloc(reg,nil);
  202. insertllItem(p1.previous,p1,hp);
  203. IncludeRegInUsedRegs(reg,initialusedregs);
  204. end;
  205. while assigned(p1) and
  206. (p1 <> p2) do
  207. begin
  208. if assigned(p1.optinfo) then
  209. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  210. p1 := tai(p1.next);
  211. repeat
  212. while assigned(p1) and
  213. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  214. p1 := tai(p1.next);
  215. { remove all allocation/deallocation info about the register in between }
  216. if assigned(p1) and
  217. (p1.typ = ait_regalloc) then
  218. if tai_regalloc(p1).reg=reg then
  219. begin
  220. if not removedSomething then
  221. begin
  222. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  223. removedSomething := true;
  224. end;
  225. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  226. hp := tai(p1.Next);
  227. asml.Remove(p1);
  228. p1.free;
  229. p1 := hp;
  230. end
  231. else p1 := tai(p1.next);
  232. until not(assigned(p1)) or
  233. not(p1.typ in SkipInstr);
  234. end;
  235. if assigned(p1) then
  236. begin
  237. if firstRemovedWasAlloc then
  238. begin
  239. hp := tai_regalloc.Alloc(reg,nil);
  240. insertLLItem(start.previous,start,hp);
  241. end;
  242. if lastRemovedWasDealloc then
  243. begin
  244. hp := tai_regalloc.DeAlloc(reg,nil);
  245. insertLLItem(p1.previous,p1,hp);
  246. end;
  247. end;
  248. end;
  249. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  250. var
  251. p: taicpu;
  252. begin
  253. if not assigned(hp) or
  254. (hp.typ <> ait_instruction) then
  255. begin
  256. Result := false;
  257. exit;
  258. end;
  259. p := taicpu(hp);
  260. Result :=
  261. (((p.opcode = A_MOV) or
  262. (p.opcode = A_MOVZX) or
  263. (p.opcode = A_MOVSX) or
  264. (p.opcode = A_LEA) or
  265. (p.opcode = A_VMOVSS) or
  266. (p.opcode = A_VMOVSD) or
  267. (p.opcode = A_VMOVAPD) or
  268. (p.opcode = A_VMOVAPS) or
  269. (p.opcode = A_VMOVQ) or
  270. (p.opcode = A_MOVSS) or
  271. (p.opcode = A_MOVSD) or
  272. (p.opcode = A_MOVQ) or
  273. (p.opcode = A_MOVAPD) or
  274. (p.opcode = A_MOVAPS)) and
  275. (p.oper[1]^.typ = top_reg) and
  276. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  277. ((p.oper[0]^.typ = top_const) or
  278. ((p.oper[0]^.typ = top_reg) and
  279. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  280. ((p.oper[0]^.typ = top_ref) and
  281. not RegInRef(reg,p.oper[0]^.ref^)))) or
  282. ((p.opcode = A_POP) and
  283. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  284. end;
  285. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  286. var
  287. hp2,hp3 : tai;
  288. begin
  289. result:=(p.typ=ait_instruction) and
  290. ((taicpu(p).opcode = A_RET) or
  291. ((taicpu(p).opcode=A_LEAVE) and
  292. GetNextInstruction(p,hp2) and
  293. (hp2.typ=ait_instruction) and
  294. (taicpu(hp2).opcode=A_RET)
  295. ) or
  296. ((taicpu(p).opcode=A_MOV) and
  297. (taicpu(p).oper[0]^.typ=top_reg) and
  298. (taicpu(p).oper[0]^.reg=NR_EBP) and
  299. (taicpu(p).oper[1]^.typ=top_reg) and
  300. (taicpu(p).oper[1]^.reg=NR_ESP) and
  301. GetNextInstruction(p,hp2) and
  302. (hp2.typ=ait_instruction) and
  303. (taicpu(hp2).opcode=A_POP) and
  304. (taicpu(hp2).oper[0]^.typ=top_reg) and
  305. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  306. GetNextInstruction(hp2,hp3) and
  307. (hp3.typ=ait_instruction) and
  308. (taicpu(hp3).opcode=A_RET)
  309. )
  310. );
  311. end;
  312. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  313. begin
  314. isFoldableArithOp := False;
  315. case hp1.opcode of
  316. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  317. isFoldableArithOp :=
  318. ((taicpu(hp1).oper[0]^.typ = top_const) or
  319. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  320. (taicpu(hp1).oper[0]^.reg <> reg))) and
  321. (taicpu(hp1).oper[1]^.typ = top_reg) and
  322. (taicpu(hp1).oper[1]^.reg = reg);
  323. A_INC,A_DEC,A_NEG,A_NOT:
  324. isFoldableArithOp :=
  325. (taicpu(hp1).oper[0]^.typ = top_reg) and
  326. (taicpu(hp1).oper[0]^.reg = reg);
  327. end;
  328. end;
  329. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  330. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  331. var
  332. hp2: tai;
  333. begin
  334. hp2 := p;
  335. repeat
  336. hp2 := tai(hp2.previous);
  337. if assigned(hp2) and
  338. (hp2.typ = ait_regalloc) and
  339. (tai_regalloc(hp2).ratype=ra_dealloc) and
  340. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  341. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  342. begin
  343. asml.remove(hp2);
  344. hp2.free;
  345. break;
  346. end;
  347. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  348. end;
  349. begin
  350. case current_procinfo.procdef.returndef.typ of
  351. arraydef,recorddef,pointerdef,
  352. stringdef,enumdef,procdef,objectdef,errordef,
  353. filedef,setdef,procvardef,
  354. classrefdef,forwarddef:
  355. DoRemoveLastDeallocForFuncRes(RS_EAX);
  356. orddef:
  357. if current_procinfo.procdef.returndef.size <> 0 then
  358. begin
  359. DoRemoveLastDeallocForFuncRes(RS_EAX);
  360. { for int64/qword }
  361. if current_procinfo.procdef.returndef.size = 8 then
  362. DoRemoveLastDeallocForFuncRes(RS_EDX);
  363. end;
  364. end;
  365. end;
  366. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  367. var
  368. TmpUsedRegs : TAllUsedRegs;
  369. hp1,hp2 : tai;
  370. begin
  371. result:=false;
  372. if MatchOpType(taicpu(p),top_reg,top_reg) then
  373. begin
  374. { vmova* reg1,reg1
  375. =>
  376. <nop> }
  377. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  378. begin
  379. GetNextInstruction(p,hp1);
  380. asml.Remove(p);
  381. p.Free;
  382. p:=hp1;
  383. result:=true;
  384. end
  385. else if GetNextInstruction(p,hp1) then
  386. begin
  387. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  388. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  389. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  390. begin
  391. { vmova* reg1,reg2
  392. vmova* reg2,reg3
  393. dealloc reg2
  394. =>
  395. vmova* reg1,reg3 }
  396. CopyUsedRegs(TmpUsedRegs);
  397. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  398. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  399. begin
  400. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  401. asml.Remove(hp1);
  402. hp1.Free;
  403. result:=true;
  404. end
  405. { special case:
  406. vmova* reg1,reg2
  407. vmova* reg2,reg1
  408. =>
  409. vmova* reg1,reg2 }
  410. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  411. begin
  412. asml.Remove(hp1);
  413. hp1.Free;
  414. result:=true;
  415. end
  416. end
  417. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  418. { we mix single and double opperations here because we assume that the compiler
  419. generates vmovapd only after double operations and vmovaps only after single operations }
  420. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  421. GetNextInstruction(hp1,hp2) and
  422. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  423. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  424. begin
  425. CopyUsedRegs(TmpUsedRegs);
  426. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  427. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  428. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  429. then
  430. begin
  431. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  432. asml.Remove(p);
  433. p.Free;
  434. asml.Remove(hp2);
  435. hp2.Free;
  436. p:=hp1;
  437. end;
  438. end;
  439. end;
  440. end;
  441. end;
  442. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  443. var
  444. TmpUsedRegs : TAllUsedRegs;
  445. hp1 : tai;
  446. begin
  447. result:=false;
  448. if GetNextInstruction(p,hp1) and
  449. { we mix single and double opperations here because we assume that the compiler
  450. generates vmovapd only after double operations and vmovaps only after single operations }
  451. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  452. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  453. (taicpu(hp1).oper[1]^.typ=top_reg) then
  454. begin
  455. CopyUsedRegs(TmpUsedRegs);
  456. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  457. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  458. ) then
  459. begin
  460. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  461. asml.Remove(hp1);
  462. hp1.Free;
  463. result:=true;
  464. end;
  465. end;
  466. end;
  467. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  468. var
  469. hp1, hp2: tai;
  470. TmpUsedRegs : TAllUsedRegs;
  471. GetNextIntruction_p : Boolean;
  472. begin
  473. Result:=false;
  474. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  475. if GetNextIntruction_p and
  476. MatchInstruction(hp1,A_AND,[]) and
  477. (taicpu(p).oper[1]^.typ = top_reg) and
  478. MatchOpType(taicpu(hp1),top_const,top_reg) and
  479. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  480. case taicpu(p).opsize Of
  481. S_L:
  482. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  483. begin
  484. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  485. asml.remove(hp1);
  486. hp1.free;
  487. Result:=true;
  488. exit;
  489. end;
  490. end
  491. else if GetNextIntruction_p and
  492. MatchInstruction(hp1,A_MOV,[]) and
  493. (taicpu(p).oper[1]^.typ = top_reg) and
  494. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  495. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  496. begin
  497. CopyUsedRegs(TmpUsedRegs);
  498. { we have
  499. mov x, %treg
  500. mov %treg, y
  501. }
  502. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  503. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  504. { we've got
  505. mov x, %treg
  506. mov %treg, y
  507. with %treg is not used after }
  508. case taicpu(p).oper[0]^.typ Of
  509. top_reg:
  510. begin
  511. { change
  512. mov %reg, %treg
  513. mov %treg, y
  514. to
  515. mov %reg, y
  516. }
  517. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  518. asml.remove(hp1);
  519. hp1.free;
  520. ReleaseUsedRegs(TmpUsedRegs);
  521. Exit;
  522. end;
  523. top_ref:
  524. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  525. begin
  526. { change
  527. mov mem, %treg
  528. mov %treg, %reg
  529. to
  530. mov mem, %reg"
  531. }
  532. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  533. asml.remove(hp1);
  534. hp1.free;
  535. ReleaseUsedRegs(TmpUsedRegs);
  536. Exit;
  537. end;
  538. end;
  539. ReleaseUsedRegs(TmpUsedRegs);
  540. end
  541. else
  542. { Change
  543. mov %reg1, %reg2
  544. xxx %reg2, ???
  545. to
  546. mov %reg1, %reg2
  547. xxx %reg1, ???
  548. to avoid a write/read penalty
  549. }
  550. if MatchOpType(taicpu(p),top_reg,top_reg) and
  551. GetNextInstruction(p,hp1) and
  552. (tai(hp1).typ = ait_instruction) and
  553. (taicpu(hp1).ops >= 1) and
  554. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  555. { we have
  556. mov %reg1, %reg2
  557. XXX %reg2, ???
  558. }
  559. begin
  560. if ((taicpu(hp1).opcode = A_OR) or
  561. (taicpu(hp1).opcode = A_TEST)) and
  562. (taicpu(hp1).oper[1]^.typ = top_reg) and
  563. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  564. { we have
  565. mov %reg1, %reg2
  566. test/or %reg2, %reg2
  567. }
  568. begin
  569. CopyUsedRegs(TmpUsedRegs);
  570. { reg1 will be used after the first instruction,
  571. so update the allocation info }
  572. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  573. if GetNextInstruction(hp1, hp2) and
  574. (hp2.typ = ait_instruction) and
  575. taicpu(hp2).is_jmp and
  576. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  577. { change
  578. mov %reg1, %reg2
  579. test/or %reg2, %reg2
  580. jxx
  581. to
  582. test %reg1, %reg1
  583. jxx
  584. }
  585. begin
  586. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  587. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  588. asml.remove(p);
  589. p.free;
  590. p := hp1;
  591. ReleaseUsedRegs(TmpUsedRegs);
  592. Exit;
  593. end
  594. else
  595. { change
  596. mov %reg1, %reg2
  597. test/or %reg2, %reg2
  598. to
  599. mov %reg1, %reg2
  600. test/or %reg1, %reg1
  601. }
  602. begin
  603. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  604. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  605. end;
  606. ReleaseUsedRegs(TmpUsedRegs);
  607. end
  608. end
  609. else
  610. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  611. x >= RetOffset) as it doesn't do anything (it writes either to a
  612. parameter or to the temporary storage room for the function
  613. result)
  614. }
  615. if GetNextIntruction_p and
  616. (tai(hp1).typ = ait_instruction) then
  617. begin
  618. if IsExitCode(hp1) and
  619. MatchOpType(p,top_reg,top_ref) and
  620. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  621. not(assigned(current_procinfo.procdef.funcretsym) and
  622. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  623. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  624. begin
  625. asml.remove(p);
  626. p.free;
  627. p := hp1;
  628. DebugMsg('Peephole removed deadstore before leave/ret',p);
  629. RemoveLastDeallocForFuncRes(p);
  630. end
  631. { change
  632. mov reg1, mem1
  633. cmp x, mem1
  634. to
  635. mov reg1, mem1
  636. cmp x, reg1
  637. }
  638. else if MatchOpType(p,top_reg,top_ref) and
  639. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  640. (taicpu(hp1).oper[1]^.typ = top_ref) and
  641. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  642. begin
  643. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  644. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  645. end;
  646. end;
  647. { Next instruction is also a MOV ? }
  648. if GetNextIntruction_p and
  649. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  650. begin
  651. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  652. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  653. { mov reg1, mem1 or mov mem1, reg1
  654. mov mem2, reg2 mov reg2, mem2}
  655. begin
  656. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  657. { mov reg1, mem1 or mov mem1, reg1
  658. mov mem2, reg1 mov reg2, mem1}
  659. begin
  660. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  661. { Removes the second statement from
  662. mov reg1, mem1/reg2
  663. mov mem1/reg2, reg1 }
  664. begin
  665. if (taicpu(p).oper[0]^.typ = top_reg) then
  666. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  667. DebugMsg('PeepHole Optimization,MovMov2Mov1',p);
  668. asml.remove(hp1);
  669. hp1.free;
  670. Result:=true;
  671. exit;
  672. end
  673. else
  674. begin
  675. CopyUsedRegs(TmpUsedRegs);
  676. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  677. if (taicpu(p).oper[1]^.typ = top_ref) and
  678. { mov reg1, mem1
  679. mov mem2, reg1 }
  680. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  681. GetNextInstruction(hp1, hp2) and
  682. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  683. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  684. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  685. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  686. { change to
  687. mov reg1, mem1 mov reg1, mem1
  688. mov mem2, reg1 cmp reg1, mem2
  689. cmp mem1, reg1
  690. }
  691. begin
  692. asml.remove(hp2);
  693. hp2.free;
  694. taicpu(hp1).opcode := A_CMP;
  695. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  696. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  697. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  698. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  699. end;
  700. ReleaseUsedRegs(TmpUsedRegs);
  701. end;
  702. end
  703. else if (taicpu(p).oper[1]^.typ=top_ref) and
  704. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  705. begin
  706. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  707. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  708. end
  709. else
  710. begin
  711. CopyUsedRegs(TmpUsedRegs);
  712. if GetNextInstruction(hp1, hp2) and
  713. MatchOpType(taicpu(p),top_ref,top_reg) and
  714. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  715. (taicpu(hp1).oper[1]^.typ = top_ref) and
  716. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  717. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  718. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  719. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  720. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  721. { mov mem1, %reg1
  722. mov %reg1, mem2
  723. mov mem2, reg2
  724. to:
  725. mov mem1, reg2
  726. mov reg2, mem2}
  727. begin
  728. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  729. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  730. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  731. asml.remove(hp2);
  732. hp2.free;
  733. end
  734. {$ifdef i386}
  735. { this is enabled for i386 only, as the rules to create the reg sets below
  736. are too complicated for x86-64, so this makes this code too error prone
  737. on x86-64
  738. }
  739. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  740. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  741. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  742. { mov mem1, reg1 mov mem1, reg1
  743. mov reg1, mem2 mov reg1, mem2
  744. mov mem2, reg2 mov mem2, reg1
  745. to: to:
  746. mov mem1, reg1 mov mem1, reg1
  747. mov mem1, reg2 mov reg1, mem2
  748. mov reg1, mem2
  749. or (if mem1 depends on reg1
  750. and/or if mem2 depends on reg2)
  751. to:
  752. mov mem1, reg1
  753. mov reg1, mem2
  754. mov reg1, reg2
  755. }
  756. begin
  757. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  758. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  759. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  760. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  761. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  762. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  763. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  764. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  765. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  766. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  767. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  768. end
  769. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  770. begin
  771. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  772. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  773. end
  774. else
  775. begin
  776. asml.remove(hp2);
  777. hp2.free;
  778. end
  779. {$endif i386}
  780. ;
  781. ReleaseUsedRegs(TmpUsedRegs);
  782. end;
  783. end
  784. (* { movl [mem1],reg1
  785. movl [mem1],reg2
  786. to
  787. movl [mem1],reg1
  788. movl reg1,reg2
  789. }
  790. else if (taicpu(p).oper[0]^.typ = top_ref) and
  791. (taicpu(p).oper[1]^.typ = top_reg) and
  792. (taicpu(hp1).oper[0]^.typ = top_ref) and
  793. (taicpu(hp1).oper[1]^.typ = top_reg) and
  794. (taicpu(p).opsize = taicpu(hp1).opsize) and
  795. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  796. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  797. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  798. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  799. else*)
  800. { movl const1,[mem1]
  801. movl [mem1],reg1
  802. to
  803. movl const1,reg1
  804. movl reg1,[mem1]
  805. }
  806. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  807. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  808. (taicpu(p).opsize = taicpu(hp1).opsize) and
  809. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  810. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  811. begin
  812. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  813. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  814. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  815. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  816. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  817. end
  818. end
  819. else if (taicpu(p).oper[1]^.typ = top_reg) and
  820. GetNextIntruction_p and
  821. (hp1.typ = ait_instruction) and
  822. GetNextInstruction(hp1, hp2) and
  823. MatchInstruction(taicpu(hp2),A_MOV,[]) and
  824. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  825. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  826. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  827. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  828. ) then
  829. { change movsX/movzX reg/ref, reg2
  830. add/sub/or/... reg3/$const, reg2
  831. mov reg2 reg/ref
  832. to add/sub/or/... reg3/$const, reg/ref }
  833. begin
  834. CopyUsedRegs(TmpUsedRegs);
  835. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  836. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  837. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  838. begin
  839. { by example:
  840. movswl %si,%eax movswl %si,%eax p
  841. decl %eax addl %edx,%eax hp1
  842. movw %ax,%si movw %ax,%si hp2
  843. ->
  844. movswl %si,%eax movswl %si,%eax p
  845. decw %eax addw %edx,%eax hp1
  846. movw %ax,%si movw %ax,%si hp2
  847. }
  848. DebugMsg('PeepHole Optimization '+
  849. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  850. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  851. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  852. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  853. {
  854. ->
  855. movswl %si,%eax movswl %si,%eax p
  856. decw %si addw %dx,%si hp1
  857. movw %ax,%si movw %ax,%si hp2
  858. }
  859. case taicpu(hp1).ops of
  860. 1:
  861. begin
  862. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  863. if taicpu(hp1).oper[0]^.typ=top_reg then
  864. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  865. end;
  866. 2:
  867. begin
  868. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  869. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  870. (taicpu(hp1).opcode<>A_SHL) and
  871. (taicpu(hp1).opcode<>A_SHR) and
  872. (taicpu(hp1).opcode<>A_SAR) then
  873. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  874. end;
  875. else
  876. internalerror(2008042701);
  877. end;
  878. {
  879. ->
  880. decw %si addw %dx,%si p
  881. }
  882. asml.remove(p);
  883. asml.remove(hp2);
  884. p.Free;
  885. hp2.Free;
  886. p := hp1;
  887. end;
  888. ReleaseUsedRegs(TmpUsedRegs);
  889. end;
  890. if GetNextIntruction_p and
  891. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  892. GetNextInstruction(hp1, hp2) and
  893. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  894. MatchOperand(Taicpu(p).oper[0]^,0) and
  895. (Taicpu(p).oper[1]^.typ = top_reg) and
  896. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  897. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  898. { mov reg1,0
  899. bts reg1,operand1 --> mov reg1,operand2
  900. or reg1,operand2 bts reg1,operand1}
  901. begin
  902. Taicpu(hp2).opcode:=A_MOV;
  903. asml.remove(hp1);
  904. insertllitem(hp2,hp2.next,hp1);
  905. asml.remove(p);
  906. p.free;
  907. p:=hp1;
  908. end;
  909. if GetNextIntruction_p and
  910. MatchInstruction(hp1,A_LEA,[S_L]) and
  911. MatchOpType(Taicpu(p),top_ref,top_reg) and
  912. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  913. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  914. ) or
  915. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  916. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  917. )
  918. ) then
  919. { mov reg1,ref
  920. lea reg2,[reg1,reg2]
  921. to
  922. add reg2,ref}
  923. begin
  924. CopyUsedRegs(TmpUsedRegs);
  925. { reg1 may not be used afterwards }
  926. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  927. begin
  928. Taicpu(hp1).opcode:=A_ADD;
  929. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  930. DebugMsg('Peephole MovLea2Add done',hp1);
  931. asml.remove(p);
  932. p.free;
  933. p:=hp1;
  934. end;
  935. ReleaseUsedRegs(TmpUsedRegs);
  936. end;
  937. end;
  938. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  939. var
  940. TmpUsedRegs : TAllUsedRegs;
  941. hp1,hp2: tai;
  942. begin
  943. Result:=false;
  944. if MatchOpType(taicpu(p),top_reg,top_reg) and
  945. GetNextInstruction(p, hp1) and
  946. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  947. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  948. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  949. or
  950. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  951. ) and
  952. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  953. { mov reg1, reg2
  954. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  955. begin
  956. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  957. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  958. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  959. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  960. asml.remove(p);
  961. p.free;
  962. p := hp1;
  963. Result:=true;
  964. exit;
  965. end
  966. else if (taicpu(p).oper[0]^.typ = top_ref) and
  967. GetNextInstruction(p,hp1) and
  968. (hp1.typ = ait_instruction) and
  969. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  970. ((taicpu(hp1).opcode=A_LEA) and
  971. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  972. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  973. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  974. ) or
  975. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  976. taicpu(p).oper[1]^.reg) and
  977. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  978. )
  979. )
  980. ) and
  981. GetNextInstruction(hp1,hp2) and
  982. MatchInstruction(hp2,A_MOV,[]) and
  983. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  984. (taicpu(hp2).oper[1]^.typ = top_ref) then
  985. begin
  986. CopyUsedRegs(TmpUsedRegs);
  987. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  988. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  989. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2, TmpUsedRegs))) then
  990. { change mov (ref), reg
  991. add/sub/or/... reg2/$const, reg
  992. mov reg, (ref)
  993. # release reg
  994. to add/sub/or/... reg2/$const, (ref) }
  995. begin
  996. case taicpu(hp1).opcode of
  997. A_INC,A_DEC,A_NOT,A_NEG :
  998. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  999. A_LEA :
  1000. begin
  1001. taicpu(hp1).opcode:=A_ADD;
  1002. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1003. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1004. else
  1005. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1006. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1007. DebugMsg('Peephole FoldLea done',hp1);
  1008. end
  1009. else
  1010. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1011. end;
  1012. asml.remove(p);
  1013. asml.remove(hp2);
  1014. p.free;
  1015. hp2.free;
  1016. p := hp1
  1017. end;
  1018. ReleaseUsedRegs(TmpUsedRegs);
  1019. end;
  1020. end;
  1021. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1022. var
  1023. hp1 : tai;
  1024. GetNextIntruction_p : Boolean;
  1025. begin
  1026. Result:=false;
  1027. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  1028. if GetNextIntruction_p and
  1029. MatchOpType(p,top_const,top_reg) and
  1030. MatchInstruction(hp1,A_AND,[]) and
  1031. MatchOpType(hp1,top_const,top_reg) and
  1032. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1033. { the second register must contain the first one, so compare their subreg types }
  1034. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1035. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1036. { change
  1037. and const1, reg
  1038. and const2, reg
  1039. to
  1040. and (const1 and const2), reg
  1041. }
  1042. begin
  1043. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1044. DebugMsg('Peephole AndAnd2And done',hp1);
  1045. asml.remove(p);
  1046. p.Free;
  1047. p:=hp1;
  1048. Result:=true;
  1049. exit;
  1050. end
  1051. else if GetNextIntruction_p and
  1052. MatchOpType(p,top_const,top_reg) and
  1053. MatchInstruction(hp1,A_MOVZX,[]) and
  1054. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1055. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1056. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1057. (((taicpu(p).opsize=S_W) and
  1058. (taicpu(hp1).opsize=S_BW)) or
  1059. ((taicpu(p).opsize=S_L) and
  1060. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  1061. ((taicpu(p).opsize=S_Q) and
  1062. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1063. ) then
  1064. begin
  1065. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  1066. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1067. ) or
  1068. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  1069. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
  1070. (((taicpu(hp1).opsize)=S_LQ) and
  1071. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1072. ) then
  1073. begin
  1074. DebugMsg('Peephole AndMovzToAnd done',p);
  1075. asml.remove(hp1);
  1076. hp1.free;
  1077. end;
  1078. end
  1079. else if GetNextIntruction_p and
  1080. MatchOpType(p,top_const,top_reg) and
  1081. MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
  1082. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1083. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1084. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1085. (((taicpu(p).opsize=S_W) and
  1086. (taicpu(hp1).opsize=S_BW)) or
  1087. ((taicpu(p).opsize=S_L) and
  1088. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  1089. ((taicpu(p).opsize=S_Q) and
  1090. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1091. ) then
  1092. begin
  1093. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  1094. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1095. ) or
  1096. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  1097. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]
  1098. ^.val)) or
  1099. (((taicpu(hp1).opsize)=S_LQ) and
  1100. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]
  1101. ^.val)
  1102. ) then
  1103. begin
  1104. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1105. asml.remove(hp1);
  1106. hp1.free;
  1107. end;
  1108. end;
  1109. (* else
  1110. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  1111. jump, but only if it's a conditional jump (PFV) }
  1112. if (taicpu(p).oper[1]^.typ = top_reg) and
  1113. GetNextInstruction(p, hp1) and
  1114. (hp1.typ = ait_instruction) and
  1115. (taicpu(hp1).is_jmp) and
  1116. (taicpu(hp1).opcode<>A_JMP) and
  1117. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  1118. taicpu(p).opcode := A_TEST;*)
  1119. end;
  1120. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1121. begin
  1122. if MatchOperand(taicpu(p).oper[0]^,0) and
  1123. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1124. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1125. { change "mov $0, %reg" into "xor %reg, %reg" }
  1126. begin
  1127. taicpu(p).opcode := A_XOR;
  1128. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1129. end;
  1130. end;
  1131. end.