aoptx86.pas 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. if not(RegInUsedRegs(reg,initialusedregs)) then
  200. begin
  201. hp := tai_regalloc.alloc(reg,nil);
  202. insertllItem(p1.previous,p1,hp);
  203. IncludeRegInUsedRegs(reg,initialusedregs);
  204. end;
  205. while assigned(p1) and
  206. (p1 <> p2) do
  207. begin
  208. if assigned(p1.optinfo) then
  209. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  210. p1 := tai(p1.next);
  211. repeat
  212. while assigned(p1) and
  213. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  214. p1 := tai(p1.next);
  215. { remove all allocation/deallocation info about the register in between }
  216. if assigned(p1) and
  217. (p1.typ = ait_regalloc) then
  218. if tai_regalloc(p1).reg=reg then
  219. begin
  220. if not removedSomething then
  221. begin
  222. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  223. removedSomething := true;
  224. end;
  225. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  226. hp := tai(p1.Next);
  227. asml.Remove(p1);
  228. p1.free;
  229. p1 := hp;
  230. end
  231. else p1 := tai(p1.next);
  232. until not(assigned(p1)) or
  233. not(p1.typ in SkipInstr);
  234. end;
  235. if assigned(p1) then
  236. begin
  237. if firstRemovedWasAlloc then
  238. begin
  239. hp := tai_regalloc.Alloc(reg,nil);
  240. insertLLItem(start.previous,start,hp);
  241. end;
  242. if lastRemovedWasDealloc then
  243. begin
  244. hp := tai_regalloc.DeAlloc(reg,nil);
  245. insertLLItem(p1.previous,p1,hp);
  246. end;
  247. end;
  248. end;
  249. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  250. var
  251. p: taicpu;
  252. begin
  253. if not assigned(hp) or
  254. (hp.typ <> ait_instruction) then
  255. begin
  256. Result := false;
  257. exit;
  258. end;
  259. p := taicpu(hp);
  260. Result :=
  261. (((p.opcode = A_MOV) or
  262. (p.opcode = A_MOVZX) or
  263. (p.opcode = A_MOVSX) or
  264. (p.opcode = A_LEA) or
  265. (p.opcode = A_VMOVSS) or
  266. (p.opcode = A_VMOVSD) or
  267. (p.opcode = A_VMOVAPD) or
  268. (p.opcode = A_VMOVAPS) or
  269. (p.opcode = A_VMOVQ) or
  270. (p.opcode = A_MOVSS) or
  271. (p.opcode = A_MOVSD) or
  272. (p.opcode = A_MOVQ) or
  273. (p.opcode = A_MOVAPD) or
  274. (p.opcode = A_MOVAPS)) and
  275. (p.oper[1]^.typ = top_reg) and
  276. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  277. ((p.oper[0]^.typ = top_const) or
  278. ((p.oper[0]^.typ = top_reg) and
  279. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  280. ((p.oper[0]^.typ = top_ref) and
  281. not RegInRef(reg,p.oper[0]^.ref^)))) or
  282. ((p.opcode = A_POP) and
  283. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  284. end;
  285. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  286. var
  287. hp2,hp3 : tai;
  288. begin
  289. result:=(p.typ=ait_instruction) and
  290. ((taicpu(p).opcode = A_RET) or
  291. ((taicpu(p).opcode=A_LEAVE) and
  292. GetNextInstruction(p,hp2) and
  293. (hp2.typ=ait_instruction) and
  294. (taicpu(hp2).opcode=A_RET)
  295. ) or
  296. ((taicpu(p).opcode=A_MOV) and
  297. (taicpu(p).oper[0]^.typ=top_reg) and
  298. (taicpu(p).oper[0]^.reg=NR_EBP) and
  299. (taicpu(p).oper[1]^.typ=top_reg) and
  300. (taicpu(p).oper[1]^.reg=NR_ESP) and
  301. GetNextInstruction(p,hp2) and
  302. (hp2.typ=ait_instruction) and
  303. (taicpu(hp2).opcode=A_POP) and
  304. (taicpu(hp2).oper[0]^.typ=top_reg) and
  305. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  306. GetNextInstruction(hp2,hp3) and
  307. (hp3.typ=ait_instruction) and
  308. (taicpu(hp3).opcode=A_RET)
  309. )
  310. );
  311. end;
  312. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  313. begin
  314. isFoldableArithOp := False;
  315. case hp1.opcode of
  316. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  317. isFoldableArithOp :=
  318. ((taicpu(hp1).oper[0]^.typ = top_const) or
  319. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  320. (taicpu(hp1).oper[0]^.reg <> reg))) and
  321. (taicpu(hp1).oper[1]^.typ = top_reg) and
  322. (taicpu(hp1).oper[1]^.reg = reg);
  323. A_INC,A_DEC,A_NEG,A_NOT:
  324. isFoldableArithOp :=
  325. (taicpu(hp1).oper[0]^.typ = top_reg) and
  326. (taicpu(hp1).oper[0]^.reg = reg);
  327. end;
  328. end;
  329. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  330. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  331. var
  332. hp2: tai;
  333. begin
  334. hp2 := p;
  335. repeat
  336. hp2 := tai(hp2.previous);
  337. if assigned(hp2) and
  338. (hp2.typ = ait_regalloc) and
  339. (tai_regalloc(hp2).ratype=ra_dealloc) and
  340. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  341. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  342. begin
  343. asml.remove(hp2);
  344. hp2.free;
  345. break;
  346. end;
  347. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  348. end;
  349. begin
  350. case current_procinfo.procdef.returndef.typ of
  351. arraydef,recorddef,pointerdef,
  352. stringdef,enumdef,procdef,objectdef,errordef,
  353. filedef,setdef,procvardef,
  354. classrefdef,forwarddef:
  355. DoRemoveLastDeallocForFuncRes(RS_EAX);
  356. orddef:
  357. if current_procinfo.procdef.returndef.size <> 0 then
  358. begin
  359. DoRemoveLastDeallocForFuncRes(RS_EAX);
  360. { for int64/qword }
  361. if current_procinfo.procdef.returndef.size = 8 then
  362. DoRemoveLastDeallocForFuncRes(RS_EDX);
  363. end;
  364. end;
  365. end;
  366. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  367. var
  368. TmpUsedRegs : TAllUsedRegs;
  369. hp1,hp2 : tai;
  370. begin
  371. result:=false;
  372. if MatchOpType(taicpu(p),top_reg,top_reg) then
  373. begin
  374. { vmova* reg1,reg1
  375. =>
  376. <nop> }
  377. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  378. begin
  379. GetNextInstruction(p,hp1);
  380. asml.Remove(p);
  381. p.Free;
  382. p:=hp1;
  383. result:=true;
  384. end
  385. else if GetNextInstruction(p,hp1) then
  386. begin
  387. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  388. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  389. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  390. begin
  391. { vmova* reg1,reg2
  392. vmova* reg2,reg3
  393. dealloc reg2
  394. =>
  395. vmova* reg1,reg3 }
  396. CopyUsedRegs(TmpUsedRegs);
  397. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  398. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  399. begin
  400. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  401. asml.Remove(hp1);
  402. hp1.Free;
  403. result:=true;
  404. end
  405. { special case:
  406. vmova* reg1,reg2
  407. vmova* reg2,reg1
  408. =>
  409. vmova* reg1,reg2 }
  410. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  411. begin
  412. asml.Remove(hp1);
  413. hp1.Free;
  414. result:=true;
  415. end
  416. end
  417. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  418. { we mix single and double opperations here because we assume that the compiler
  419. generates vmovapd only after double operations and vmovaps only after single operations }
  420. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  421. GetNextInstruction(hp1,hp2) and
  422. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  423. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  424. begin
  425. CopyUsedRegs(TmpUsedRegs);
  426. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  427. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  428. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  429. then
  430. begin
  431. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  432. asml.Remove(p);
  433. p.Free;
  434. asml.Remove(hp2);
  435. hp2.Free;
  436. p:=hp1;
  437. end;
  438. end;
  439. end;
  440. end;
  441. end;
  442. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  443. var
  444. TmpUsedRegs : TAllUsedRegs;
  445. hp1 : tai;
  446. begin
  447. result:=false;
  448. if GetNextInstruction(p,hp1) and
  449. { we mix single and double opperations here because we assume that the compiler
  450. generates vmovapd only after double operations and vmovaps only after single operations }
  451. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  452. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  453. (taicpu(hp1).oper[1]^.typ=top_reg) then
  454. begin
  455. CopyUsedRegs(TmpUsedRegs);
  456. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  457. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  458. ) then
  459. begin
  460. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  461. asml.Remove(hp1);
  462. hp1.Free;
  463. result:=true;
  464. end;
  465. end;
  466. end;
  467. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  468. var
  469. hp1, hp2: tai;
  470. TmpUsedRegs : TAllUsedRegs;
  471. GetNextIntruction_p : Boolean;
  472. begin
  473. Result:=false;
  474. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  475. if GetNextIntruction_p and
  476. MatchInstruction(hp1,A_AND,[]) and
  477. (taicpu(p).oper[1]^.typ = top_reg) and
  478. MatchOpType(taicpu(hp1),top_const,top_reg) and
  479. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  480. case taicpu(p).opsize Of
  481. S_L:
  482. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  483. begin
  484. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  485. asml.remove(hp1);
  486. hp1.free;
  487. Result:=true;
  488. exit;
  489. end;
  490. end
  491. else if GetNextIntruction_p and
  492. MatchInstruction(hp1,A_MOV,[]) and
  493. (taicpu(p).oper[1]^.typ = top_reg) and
  494. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  495. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  496. begin
  497. CopyUsedRegs(TmpUsedRegs);
  498. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  499. { we have
  500. mov x, %treg
  501. mov %treg, y
  502. }
  503. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  504. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  505. { we've got
  506. mov x, %treg
  507. mov %treg, y
  508. with %treg is not used after }
  509. case taicpu(p).oper[0]^.typ Of
  510. top_reg:
  511. begin
  512. { change
  513. mov %reg, %treg
  514. mov %treg, y
  515. to
  516. mov %reg, y
  517. }
  518. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  519. asml.remove(hp1);
  520. hp1.free;
  521. ReleaseUsedRegs(TmpUsedRegs);
  522. Exit;
  523. end;
  524. top_ref:
  525. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  526. begin
  527. { change
  528. mov mem, %treg
  529. mov %treg, %reg
  530. to
  531. mov mem, %reg"
  532. }
  533. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  534. asml.remove(hp1);
  535. hp1.free;
  536. ReleaseUsedRegs(TmpUsedRegs);
  537. Exit;
  538. end;
  539. end;
  540. ReleaseUsedRegs(TmpUsedRegs);
  541. end
  542. else
  543. { Change
  544. mov %reg1, %reg2
  545. xxx %reg2, ???
  546. to
  547. mov %reg1, %reg2
  548. xxx %reg1, ???
  549. to avoid a write/read penalty
  550. }
  551. if MatchOpType(taicpu(p),top_reg,top_reg) and
  552. GetNextInstruction(p,hp1) and
  553. (tai(hp1).typ = ait_instruction) and
  554. (taicpu(hp1).ops >= 1) and
  555. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  556. { we have
  557. mov %reg1, %reg2
  558. XXX %reg2, ???
  559. }
  560. begin
  561. if ((taicpu(hp1).opcode = A_OR) or
  562. (taicpu(hp1).opcode = A_TEST)) and
  563. (taicpu(hp1).oper[1]^.typ = top_reg) and
  564. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  565. { we have
  566. mov %reg1, %reg2
  567. test/or %reg2, %reg2
  568. }
  569. begin
  570. CopyUsedRegs(TmpUsedRegs);
  571. { reg1 will be used after the first instruction,
  572. so update the allocation info }
  573. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  574. if GetNextInstruction(hp1, hp2) and
  575. (hp2.typ = ait_instruction) and
  576. taicpu(hp2).is_jmp and
  577. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  578. { change
  579. mov %reg1, %reg2
  580. test/or %reg2, %reg2
  581. jxx
  582. to
  583. test %reg1, %reg1
  584. jxx
  585. }
  586. begin
  587. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  588. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  589. asml.remove(p);
  590. p.free;
  591. p := hp1;
  592. ReleaseUsedRegs(TmpUsedRegs);
  593. Exit;
  594. end
  595. else
  596. { change
  597. mov %reg1, %reg2
  598. test/or %reg2, %reg2
  599. to
  600. mov %reg1, %reg2
  601. test/or %reg1, %reg1
  602. }
  603. begin
  604. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  605. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  606. end;
  607. ReleaseUsedRegs(TmpUsedRegs);
  608. end
  609. end
  610. else
  611. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  612. x >= RetOffset) as it doesn't do anything (it writes either to a
  613. parameter or to the temporary storage room for the function
  614. result)
  615. }
  616. if GetNextIntruction_p and
  617. (tai(hp1).typ = ait_instruction) then
  618. begin
  619. if IsExitCode(hp1) and
  620. MatchOpType(p,top_reg,top_ref) and
  621. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  622. not(assigned(current_procinfo.procdef.funcretsym) and
  623. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  624. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  625. begin
  626. asml.remove(p);
  627. p.free;
  628. p := hp1;
  629. DebugMsg('Peephole removed deadstore before leave/ret',p);
  630. RemoveLastDeallocForFuncRes(p);
  631. end
  632. { change
  633. mov reg1, mem1
  634. cmp x, mem1
  635. to
  636. mov reg1, mem1
  637. cmp x, reg1
  638. }
  639. else if MatchOpType(p,top_reg,top_ref) and
  640. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  641. (taicpu(hp1).oper[1]^.typ = top_ref) and
  642. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  643. begin
  644. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  645. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  646. end;
  647. end;
  648. { Next instruction is also a MOV ? }
  649. if GetNextIntruction_p and
  650. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  651. begin
  652. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  653. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  654. { mov reg1, mem1 or mov mem1, reg1
  655. mov mem2, reg2 mov reg2, mem2}
  656. begin
  657. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  658. { mov reg1, mem1 or mov mem1, reg1
  659. mov mem2, reg1 mov reg2, mem1}
  660. begin
  661. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  662. { Removes the second statement from
  663. mov reg1, mem1/reg2
  664. mov mem1/reg2, reg1 }
  665. begin
  666. if (taicpu(p).oper[0]^.typ = top_reg) then
  667. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  668. DebugMsg('PeepHole Optimization,MovMov2Mov1',p);
  669. asml.remove(hp1);
  670. hp1.free;
  671. Result:=true;
  672. exit;
  673. end
  674. else
  675. begin
  676. CopyUsedRegs(TmpUsedRegs);
  677. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  678. if (taicpu(p).oper[1]^.typ = top_ref) and
  679. { mov reg1, mem1
  680. mov mem2, reg1 }
  681. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  682. GetNextInstruction(hp1, hp2) and
  683. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  684. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  685. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  686. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  687. { change to
  688. mov reg1, mem1 mov reg1, mem1
  689. mov mem2, reg1 cmp reg1, mem2
  690. cmp mem1, reg1
  691. }
  692. begin
  693. asml.remove(hp2);
  694. hp2.free;
  695. taicpu(hp1).opcode := A_CMP;
  696. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  697. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  698. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  699. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  700. end;
  701. ReleaseUsedRegs(TmpUsedRegs);
  702. end;
  703. end
  704. else if (taicpu(p).oper[1]^.typ=top_ref) and
  705. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  706. begin
  707. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  708. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  709. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  710. end
  711. else
  712. begin
  713. CopyUsedRegs(TmpUsedRegs);
  714. if GetNextInstruction(hp1, hp2) and
  715. MatchOpType(taicpu(p),top_ref,top_reg) and
  716. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  717. (taicpu(hp1).oper[1]^.typ = top_ref) and
  718. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  719. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  720. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  721. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  722. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  723. { mov mem1, %reg1
  724. mov %reg1, mem2
  725. mov mem2, reg2
  726. to:
  727. mov mem1, reg2
  728. mov reg2, mem2}
  729. begin
  730. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  731. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  732. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  733. asml.remove(hp2);
  734. hp2.free;
  735. end
  736. {$ifdef i386}
  737. { this is enabled for i386 only, as the rules to create the reg sets below
  738. are too complicated for x86-64, so this makes this code too error prone
  739. on x86-64
  740. }
  741. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  742. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  743. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  744. { mov mem1, reg1 mov mem1, reg1
  745. mov reg1, mem2 mov reg1, mem2
  746. mov mem2, reg2 mov mem2, reg1
  747. to: to:
  748. mov mem1, reg1 mov mem1, reg1
  749. mov mem1, reg2 mov reg1, mem2
  750. mov reg1, mem2
  751. or (if mem1 depends on reg1
  752. and/or if mem2 depends on reg2)
  753. to:
  754. mov mem1, reg1
  755. mov reg1, mem2
  756. mov reg1, reg2
  757. }
  758. begin
  759. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  760. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  761. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  762. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  763. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  764. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  765. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  766. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  767. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  768. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  769. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  770. end
  771. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  772. begin
  773. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  774. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  775. end
  776. else
  777. begin
  778. asml.remove(hp2);
  779. hp2.free;
  780. end
  781. {$endif i386}
  782. ;
  783. ReleaseUsedRegs(TmpUsedRegs);
  784. end;
  785. end
  786. (* { movl [mem1],reg1
  787. movl [mem1],reg2
  788. to
  789. movl [mem1],reg1
  790. movl reg1,reg2
  791. }
  792. else if (taicpu(p).oper[0]^.typ = top_ref) and
  793. (taicpu(p).oper[1]^.typ = top_reg) and
  794. (taicpu(hp1).oper[0]^.typ = top_ref) and
  795. (taicpu(hp1).oper[1]^.typ = top_reg) and
  796. (taicpu(p).opsize = taicpu(hp1).opsize) and
  797. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  798. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  799. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  800. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  801. else*)
  802. { movl const1,[mem1]
  803. movl [mem1],reg1
  804. to
  805. movl const1,reg1
  806. movl reg1,[mem1]
  807. }
  808. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  809. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  810. (taicpu(p).opsize = taicpu(hp1).opsize) and
  811. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  812. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  813. begin
  814. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  815. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  816. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  817. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  818. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  819. end
  820. end
  821. else if (taicpu(p).oper[1]^.typ = top_reg) and
  822. GetNextIntruction_p and
  823. (hp1.typ = ait_instruction) and
  824. GetNextInstruction(hp1, hp2) and
  825. MatchInstruction(hp2,A_MOV,[]) and
  826. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  827. OpsEqual(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^) and
  828. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  829. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  830. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  831. ) then
  832. { change movsX/movzX reg/ref, reg2
  833. add/sub/or/... reg3/$const, reg2
  834. mov reg2 reg/ref
  835. to add/sub/or/... reg3/$const, reg/ref }
  836. begin
  837. CopyUsedRegs(TmpUsedRegs);
  838. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  839. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  840. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  841. begin
  842. { by example:
  843. movswl %si,%eax movswl %si,%eax p
  844. decl %eax addl %edx,%eax hp1
  845. movw %ax,%si movw %ax,%si hp2
  846. ->
  847. movswl %si,%eax movswl %si,%eax p
  848. decw %eax addw %edx,%eax hp1
  849. movw %ax,%si movw %ax,%si hp2
  850. }
  851. DebugMsg('PeepHole Optimization '+
  852. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  853. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  854. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  855. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  856. {
  857. ->
  858. movswl %si,%eax movswl %si,%eax p
  859. decw %si addw %dx,%si hp1
  860. movw %ax,%si movw %ax,%si hp2
  861. }
  862. case taicpu(hp1).ops of
  863. 1:
  864. begin
  865. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  866. if taicpu(hp1).oper[0]^.typ=top_reg then
  867. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  868. end;
  869. 2:
  870. begin
  871. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  872. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  873. (taicpu(hp1).opcode<>A_SHL) and
  874. (taicpu(hp1).opcode<>A_SHR) and
  875. (taicpu(hp1).opcode<>A_SAR) then
  876. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  877. end;
  878. else
  879. internalerror(2008042701);
  880. end;
  881. {
  882. ->
  883. decw %si addw %dx,%si p
  884. }
  885. asml.remove(p);
  886. asml.remove(hp2);
  887. p.Free;
  888. hp2.Free;
  889. p := hp1;
  890. end;
  891. ReleaseUsedRegs(TmpUsedRegs);
  892. end;
  893. if GetNextIntruction_p and
  894. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  895. GetNextInstruction(hp1, hp2) and
  896. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  897. MatchOperand(Taicpu(p).oper[0]^,0) and
  898. (Taicpu(p).oper[1]^.typ = top_reg) and
  899. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  900. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  901. { mov reg1,0
  902. bts reg1,operand1 --> mov reg1,operand2
  903. or reg1,operand2 bts reg1,operand1}
  904. begin
  905. Taicpu(hp2).opcode:=A_MOV;
  906. asml.remove(hp1);
  907. insertllitem(hp2,hp2.next,hp1);
  908. asml.remove(p);
  909. p.free;
  910. p:=hp1;
  911. end;
  912. if GetNextIntruction_p and
  913. MatchInstruction(hp1,A_LEA,[S_L]) and
  914. MatchOpType(Taicpu(p),top_ref,top_reg) and
  915. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  916. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  917. ) or
  918. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  919. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  920. )
  921. ) then
  922. { mov reg1,ref
  923. lea reg2,[reg1,reg2]
  924. to
  925. add reg2,ref}
  926. begin
  927. CopyUsedRegs(TmpUsedRegs);
  928. { reg1 may not be used afterwards }
  929. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  930. begin
  931. Taicpu(hp1).opcode:=A_ADD;
  932. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  933. DebugMsg('Peephole MovLea2Add done',hp1);
  934. asml.remove(p);
  935. p.free;
  936. p:=hp1;
  937. end;
  938. ReleaseUsedRegs(TmpUsedRegs);
  939. end;
  940. end;
  941. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  942. var
  943. TmpUsedRegs : TAllUsedRegs;
  944. hp1,hp2: tai;
  945. begin
  946. Result:=false;
  947. if MatchOpType(taicpu(p),top_reg,top_reg) and
  948. GetNextInstruction(p, hp1) and
  949. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  950. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  951. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  952. or
  953. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  954. ) and
  955. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  956. { mov reg1, reg2
  957. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  958. begin
  959. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  960. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  961. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  962. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  963. asml.remove(p);
  964. p.free;
  965. p := hp1;
  966. Result:=true;
  967. exit;
  968. end
  969. else if (taicpu(p).oper[0]^.typ = top_ref) and
  970. GetNextInstruction(p,hp1) and
  971. (hp1.typ = ait_instruction) and
  972. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  973. ((taicpu(hp1).opcode=A_LEA) and
  974. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  975. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  976. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  977. ) or
  978. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  979. taicpu(p).oper[1]^.reg) and
  980. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  981. )
  982. )
  983. ) and
  984. GetNextInstruction(hp1,hp2) and
  985. MatchInstruction(hp2,A_MOV,[]) and
  986. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  987. (taicpu(hp2).oper[1]^.typ = top_ref) then
  988. begin
  989. CopyUsedRegs(TmpUsedRegs);
  990. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  991. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  992. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2, TmpUsedRegs))) then
  993. { change mov (ref), reg
  994. add/sub/or/... reg2/$const, reg
  995. mov reg, (ref)
  996. # release reg
  997. to add/sub/or/... reg2/$const, (ref) }
  998. begin
  999. case taicpu(hp1).opcode of
  1000. A_INC,A_DEC,A_NOT,A_NEG :
  1001. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1002. A_LEA :
  1003. begin
  1004. taicpu(hp1).opcode:=A_ADD;
  1005. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1006. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1007. else
  1008. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1009. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1010. DebugMsg('Peephole FoldLea done',hp1);
  1011. end
  1012. else
  1013. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1014. end;
  1015. asml.remove(p);
  1016. asml.remove(hp2);
  1017. p.free;
  1018. hp2.free;
  1019. p := hp1
  1020. end;
  1021. ReleaseUsedRegs(TmpUsedRegs);
  1022. end;
  1023. end;
  1024. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1025. var
  1026. hp1 : tai;
  1027. begin
  1028. Result:=false;
  1029. if not(GetNextInstruction(p, hp1)) then
  1030. exit;
  1031. if MatchOpType(p,top_const,top_reg) and
  1032. MatchInstruction(hp1,A_AND,[]) and
  1033. MatchOpType(hp1,top_const,top_reg) and
  1034. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1035. { the second register must contain the first one, so compare their subreg types }
  1036. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1037. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1038. { change
  1039. and const1, reg
  1040. and const2, reg
  1041. to
  1042. and (const1 and const2), reg
  1043. }
  1044. begin
  1045. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1046. DebugMsg('Peephole AndAnd2And done',hp1);
  1047. asml.remove(p);
  1048. p.Free;
  1049. p:=hp1;
  1050. Result:=true;
  1051. exit;
  1052. end
  1053. else if MatchOpType(p,top_const,top_reg) and
  1054. MatchInstruction(hp1,A_MOVZX,[]) and
  1055. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1056. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1057. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1058. (((taicpu(p).opsize=S_W) and
  1059. (taicpu(hp1).opsize=S_BW)) or
  1060. ((taicpu(p).opsize=S_L) and
  1061. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1062. {$ifdef x86_64}
  1063. or
  1064. ((taicpu(p).opsize=S_Q) and
  1065. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1066. {$endif x86_64}
  1067. ) then
  1068. begin
  1069. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1070. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1071. ) or
  1072. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1073. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1074. {$ifdef x86_64}
  1075. or
  1076. (((taicpu(hp1).opsize)=S_LQ) and
  1077. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1078. )
  1079. {$endif x86_64}
  1080. then
  1081. begin
  1082. DebugMsg('Peephole AndMovzToAnd done',p);
  1083. asml.remove(hp1);
  1084. hp1.free;
  1085. end;
  1086. end
  1087. else if MatchOpType(p,top_const,top_reg) and
  1088. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1089. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1090. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1091. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1092. (((taicpu(p).opsize=S_W) and
  1093. (taicpu(hp1).opsize=S_BW)) or
  1094. ((taicpu(p).opsize=S_L) and
  1095. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1096. {$ifdef x86_64}
  1097. or
  1098. ((taicpu(p).opsize=S_Q) and
  1099. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1100. {$endif x86_64}
  1101. ) then
  1102. begin
  1103. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1104. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1105. ) or
  1106. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1107. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1108. {$ifdef x86_64}
  1109. or
  1110. (((taicpu(hp1).opsize)=S_LQ) and
  1111. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1112. )
  1113. {$endif x86_64}
  1114. then
  1115. begin
  1116. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1117. asml.remove(hp1);
  1118. hp1.free;
  1119. end;
  1120. end
  1121. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1122. (hp1.typ = ait_instruction) and
  1123. (taicpu(hp1).is_jmp) and
  1124. (taicpu(hp1).opcode<>A_JMP) and
  1125. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1126. { change
  1127. and x, reg
  1128. jxx
  1129. to
  1130. test x, reg
  1131. jxx
  1132. if reg is deallocated before the
  1133. jump, but only if it's a conditional jump (PFV)
  1134. }
  1135. taicpu(p).opcode := A_TEST;
  1136. end;
  1137. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1138. begin
  1139. if MatchOperand(taicpu(p).oper[0]^,0) and
  1140. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1141. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1142. { change "mov $0, %reg" into "xor %reg, %reg" }
  1143. begin
  1144. taicpu(p).opcode := A_XOR;
  1145. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1146. end;
  1147. end;
  1148. end.