aoptx86.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1VMOVAP(var p : tai) : boolean;
  33. function OptPass1VOP(const p : tai) : boolean;
  34. function OptPass1MOV(var p : tai) : boolean;
  35. procedure DebugMsg(const s : string; p : tai);inline;
  36. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  37. function IsExitCode(p : tai) : boolean;
  38. procedure RemoveLastDeallocForFuncRes(p : tai);
  39. end;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  41. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  42. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  43. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  44. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  45. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  46. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  47. function RefsEqual(const r1, r2: treference): boolean;
  48. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  49. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  50. implementation
  51. uses
  52. cutils,
  53. verbose,
  54. aasmcpu,
  55. procinfo,
  56. symconst,symsym;
  57. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  58. begin
  59. result :=
  60. (instr.typ = ait_instruction) and
  61. (taicpu(instr).opcode = op) and
  62. ((opsize = []) or (taicpu(instr).opsize in opsize));
  63. end;
  64. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  65. begin
  66. result :=
  67. (instr.typ = ait_instruction) and
  68. ((taicpu(instr).opcode = op1) or
  69. (taicpu(instr).opcode = op2)
  70. ) and
  71. ((opsize = []) or (taicpu(instr).opsize in opsize));
  72. end;
  73. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. ((taicpu(instr).opcode = op1) or
  78. (taicpu(instr).opcode = op2) or
  79. (taicpu(instr).opcode = op3)
  80. ) and
  81. ((opsize = []) or (taicpu(instr).opsize in opsize));
  82. end;
  83. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  84. const opsize : topsizes) : boolean;
  85. var
  86. op : TAsmOp;
  87. begin
  88. result:=false;
  89. for op in ops do
  90. begin
  91. if (instr.typ = ait_instruction) and
  92. (taicpu(instr).opcode = op) and
  93. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  94. begin
  95. result:=true;
  96. exit;
  97. end;
  98. end;
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  105. begin
  106. result := (oper.typ = top_const) and (oper.val = a);
  107. end;
  108. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  109. begin
  110. result := oper1.typ = oper2.typ;
  111. if result then
  112. case oper1.typ of
  113. top_const:
  114. Result:=oper1.val = oper2.val;
  115. top_reg:
  116. Result:=oper1.reg = oper2.reg;
  117. top_ref:
  118. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  119. else
  120. internalerror(2013102801);
  121. end
  122. end;
  123. function RefsEqual(const r1, r2: treference): boolean;
  124. begin
  125. RefsEqual :=
  126. (r1.offset = r2.offset) and
  127. (r1.segment = r2.segment) and (r1.base = r2.base) and
  128. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  129. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  130. (r1.relsymbol = r2.relsymbol);
  131. end;
  132. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  133. begin
  134. Result:=(ref.offset=0) and
  135. (ref.scalefactor in [0,1]) and
  136. (ref.segment=NR_NO) and
  137. (ref.symbol=nil) and
  138. (ref.relsymbol=nil) and
  139. ((base=NR_INVALID) or
  140. (ref.base=base)) and
  141. ((index=NR_INVALID) or
  142. (ref.index=index));
  143. end;
  144. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  145. begin
  146. Result:=(taicpu(instr).ops=2) and
  147. (taicpu(instr).oper[0]^.typ=ot0) and
  148. (taicpu(instr).oper[1]^.typ=ot1);
  149. end;
  150. {$ifdef DEBUG_AOPTCPU}
  151. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  152. begin
  153. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  154. end;
  155. {$else DEBUG_AOPTCPU}
  156. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  157. begin
  158. end;
  159. {$endif DEBUG_AOPTCPU}
  160. { allocates register reg between (and including) instructions p1 and p2
  161. the type of p1 and p2 must not be in SkipInstr
  162. note that this routine is both called from the peephole optimizer
  163. where optinfo is not yet initialised) and from the cse (where it is) }
  164. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  165. var
  166. hp, start: tai;
  167. removedsomething,
  168. firstRemovedWasAlloc,
  169. lastRemovedWasDealloc: boolean;
  170. begin
  171. {$ifdef EXTDEBUG}
  172. { if assigned(p1.optinfo) and
  173. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  174. internalerror(2004101010); }
  175. {$endif EXTDEBUG}
  176. start := p1;
  177. if (reg = NR_ESP) or
  178. (reg = current_procinfo.framepointer) or
  179. not(assigned(p1)) then
  180. { this happens with registers which are loaded implicitely, outside the }
  181. { current block (e.g. esi with self) }
  182. exit;
  183. { make sure we allocate it for this instruction }
  184. getnextinstruction(p2,p2);
  185. lastRemovedWasDealloc := false;
  186. removedSomething := false;
  187. firstRemovedWasAlloc := false;
  188. {$ifdef allocregdebug}
  189. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  190. ' from here...'));
  191. insertllitem(asml,p1.previous,p1,hp);
  192. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' till here...'));
  194. insertllitem(asml,p2,p2.next,hp);
  195. {$endif allocregdebug}
  196. if not(RegInUsedRegs(reg,initialusedregs)) then
  197. begin
  198. hp := tai_regalloc.alloc(reg,nil);
  199. insertllItem(p1.previous,p1,hp);
  200. IncludeRegInUsedRegs(reg,initialusedregs);
  201. end;
  202. while assigned(p1) and
  203. (p1 <> p2) do
  204. begin
  205. if assigned(p1.optinfo) then
  206. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  207. p1 := tai(p1.next);
  208. repeat
  209. while assigned(p1) and
  210. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  211. p1 := tai(p1.next);
  212. { remove all allocation/deallocation info about the register in between }
  213. if assigned(p1) and
  214. (p1.typ = ait_regalloc) then
  215. if tai_regalloc(p1).reg=reg then
  216. begin
  217. if not removedSomething then
  218. begin
  219. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  220. removedSomething := true;
  221. end;
  222. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  223. hp := tai(p1.Next);
  224. asml.Remove(p1);
  225. p1.free;
  226. p1 := hp;
  227. end
  228. else p1 := tai(p1.next);
  229. until not(assigned(p1)) or
  230. not(p1.typ in SkipInstr);
  231. end;
  232. if assigned(p1) then
  233. begin
  234. if firstRemovedWasAlloc then
  235. begin
  236. hp := tai_regalloc.Alloc(reg,nil);
  237. insertLLItem(start.previous,start,hp);
  238. end;
  239. if lastRemovedWasDealloc then
  240. begin
  241. hp := tai_regalloc.DeAlloc(reg,nil);
  242. insertLLItem(p1.previous,p1,hp);
  243. end;
  244. end;
  245. end;
  246. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  247. var
  248. p: taicpu;
  249. begin
  250. if not assigned(hp) or
  251. (hp.typ <> ait_instruction) then
  252. begin
  253. Result := false;
  254. exit;
  255. end;
  256. p := taicpu(hp);
  257. Result :=
  258. (((p.opcode = A_MOV) or
  259. (p.opcode = A_MOVZX) or
  260. (p.opcode = A_MOVSX) or
  261. (p.opcode = A_LEA) or
  262. (p.opcode = A_VMOVSS) or
  263. (p.opcode = A_VMOVSD) or
  264. (p.opcode = A_VMOVAPD) or
  265. (p.opcode = A_VMOVAPS) or
  266. (p.opcode = A_VMOVQ) or
  267. (p.opcode = A_MOVSS) or
  268. (p.opcode = A_MOVSD) or
  269. (p.opcode = A_MOVQ) or
  270. (p.opcode = A_MOVAPD) or
  271. (p.opcode = A_MOVAPS)) and
  272. (p.oper[1]^.typ = top_reg) and
  273. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  274. ((p.oper[0]^.typ = top_const) or
  275. ((p.oper[0]^.typ = top_reg) and
  276. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  277. ((p.oper[0]^.typ = top_ref) and
  278. not RegInRef(reg,p.oper[0]^.ref^)))) or
  279. ((p.opcode = A_POP) and
  280. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  281. end;
  282. function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  283. var
  284. hp2,hp3 : tai;
  285. begin
  286. result:=(p.typ=ait_instruction) and
  287. ((taicpu(p).opcode = A_RET) or
  288. ((taicpu(p).opcode=A_LEAVE) and
  289. GetNextInstruction(p,hp2) and
  290. (hp2.typ=ait_instruction) and
  291. (taicpu(hp2).opcode=A_RET)
  292. ) or
  293. ((taicpu(p).opcode=A_MOV) and
  294. (taicpu(p).oper[0]^.typ=top_reg) and
  295. (taicpu(p).oper[0]^.reg=NR_EBP) and
  296. (taicpu(p).oper[1]^.typ=top_reg) and
  297. (taicpu(p).oper[1]^.reg=NR_ESP) and
  298. GetNextInstruction(p,hp2) and
  299. (hp2.typ=ait_instruction) and
  300. (taicpu(hp2).opcode=A_POP) and
  301. (taicpu(hp2).oper[0]^.typ=top_reg) and
  302. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  303. GetNextInstruction(hp2,hp3) and
  304. (hp3.typ=ait_instruction) and
  305. (taicpu(hp3).opcode=A_RET)
  306. )
  307. );
  308. end;
  309. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  310. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  311. var
  312. hp2: tai;
  313. begin
  314. hp2 := p;
  315. repeat
  316. hp2 := tai(hp2.previous);
  317. if assigned(hp2) and
  318. (hp2.typ = ait_regalloc) and
  319. (tai_regalloc(hp2).ratype=ra_dealloc) and
  320. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  321. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  322. begin
  323. asml.remove(hp2);
  324. hp2.free;
  325. break;
  326. end;
  327. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  328. end;
  329. begin
  330. case current_procinfo.procdef.returndef.typ of
  331. arraydef,recorddef,pointerdef,
  332. stringdef,enumdef,procdef,objectdef,errordef,
  333. filedef,setdef,procvardef,
  334. classrefdef,forwarddef:
  335. DoRemoveLastDeallocForFuncRes(RS_EAX);
  336. orddef:
  337. if current_procinfo.procdef.returndef.size <> 0 then
  338. begin
  339. DoRemoveLastDeallocForFuncRes(RS_EAX);
  340. { for int64/qword }
  341. if current_procinfo.procdef.returndef.size = 8 then
  342. DoRemoveLastDeallocForFuncRes(RS_EDX);
  343. end;
  344. end;
  345. end;
  346. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  347. var
  348. TmpUsedRegs : TAllUsedRegs;
  349. hp1,hp2 : tai;
  350. begin
  351. result:=false;
  352. if MatchOpType(taicpu(p),top_reg,top_reg) then
  353. begin
  354. { vmova* reg1,reg1
  355. =>
  356. <nop> }
  357. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  358. begin
  359. GetNextInstruction(p,hp1);
  360. asml.Remove(p);
  361. p.Free;
  362. p:=hp1;
  363. result:=true;
  364. end
  365. else if GetNextInstruction(p,hp1) then
  366. begin
  367. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  368. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  369. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  370. begin
  371. { vmova* reg1,reg2
  372. vmova* reg2,reg3
  373. dealloc reg2
  374. =>
  375. vmova* reg1,reg3 }
  376. CopyUsedRegs(TmpUsedRegs);
  377. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  378. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  379. begin
  380. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  381. asml.Remove(hp1);
  382. hp1.Free;
  383. result:=true;
  384. end
  385. { special case:
  386. vmova* reg1,reg2
  387. vmova* reg2,reg1
  388. =>
  389. vmova* reg1,reg2 }
  390. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  391. begin
  392. asml.Remove(hp1);
  393. hp1.Free;
  394. result:=true;
  395. end
  396. end
  397. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  398. { we mix single and double opperations here because we assume that the compiler
  399. generates vmovapd only after double operations and vmovaps only after single operations }
  400. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  401. GetNextInstruction(hp1,hp2) and
  402. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  403. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  404. begin
  405. CopyUsedRegs(TmpUsedRegs);
  406. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  407. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  408. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  409. then
  410. begin
  411. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  412. asml.Remove(p);
  413. p.Free;
  414. asml.Remove(hp2);
  415. hp2.Free;
  416. p:=hp1;
  417. end;
  418. end;
  419. end;
  420. end;
  421. end;
  422. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  423. var
  424. TmpUsedRegs : TAllUsedRegs;
  425. hp1 : tai;
  426. begin
  427. result:=false;
  428. if GetNextInstruction(p,hp1) and
  429. { we mix single and double opperations here because we assume that the compiler
  430. generates vmovapd only after double operations and vmovaps only after single operations }
  431. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  432. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  433. (taicpu(hp1).oper[1]^.typ=top_reg) then
  434. begin
  435. CopyUsedRegs(TmpUsedRegs);
  436. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  437. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  438. ) then
  439. begin
  440. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  441. asml.Remove(hp1);
  442. hp1.Free;
  443. result:=true;
  444. end;
  445. end;
  446. end;
  447. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  448. var
  449. hp1, hp2: tai;
  450. TmpUsedRegs : TAllUsedRegs;
  451. begin
  452. Result:=false;
  453. if (taicpu(p).oper[1]^.typ = top_reg) and
  454. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  455. GetNextInstruction(p, hp1) and
  456. MatchInstruction(hp1,A_MOV,[]) and
  457. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  458. begin
  459. CopyUsedRegs(TmpUsedRegs);
  460. { we have
  461. mov x, %treg
  462. mov %treg, y
  463. }
  464. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  465. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  466. { we've got
  467. mov x, %treg
  468. mov %treg, y
  469. with %treg is not used after }
  470. case taicpu(p).oper[0]^.typ Of
  471. top_reg:
  472. begin
  473. { change
  474. mov %reg, %treg
  475. mov %treg, y
  476. to
  477. mov %reg, y
  478. }
  479. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  480. asml.remove(hp1);
  481. hp1.free;
  482. ReleaseUsedRegs(TmpUsedRegs);
  483. Exit;
  484. end;
  485. top_ref:
  486. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  487. begin
  488. { change
  489. mov mem, %treg
  490. mov %treg, %reg
  491. to
  492. mov mem, %reg"
  493. }
  494. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  495. asml.remove(hp1);
  496. hp1.free;
  497. ReleaseUsedRegs(TmpUsedRegs);
  498. Exit;
  499. end;
  500. end;
  501. ReleaseUsedRegs(TmpUsedRegs);
  502. end
  503. else
  504. { Change
  505. mov %reg1, %reg2
  506. xxx %reg2, ???
  507. to
  508. mov %reg1, %reg2
  509. xxx %reg1, ???
  510. to avoid a write/read penalty
  511. }
  512. if MatchOpType(taicpu(p),top_reg,top_reg) and
  513. GetNextInstruction(p,hp1) and
  514. (tai(hp1).typ = ait_instruction) and
  515. (taicpu(hp1).ops >= 1) and
  516. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  517. { we have
  518. mov %reg1, %reg2
  519. XXX %reg2, ???
  520. }
  521. begin
  522. if ((taicpu(hp1).opcode = A_OR) or
  523. (taicpu(hp1).opcode = A_TEST)) and
  524. (taicpu(hp1).oper[1]^.typ = top_reg) and
  525. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  526. { we have
  527. mov %reg1, %reg2
  528. test/or %reg2, %reg2
  529. }
  530. begin
  531. CopyUsedRegs(TmpUsedRegs);
  532. { reg1 will be used after the first instruction,
  533. so update the allocation info }
  534. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  535. if GetNextInstruction(hp1, hp2) and
  536. (hp2.typ = ait_instruction) and
  537. taicpu(hp2).is_jmp and
  538. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  539. { change
  540. mov %reg1, %reg2
  541. test/or %reg2, %reg2
  542. jxx
  543. to
  544. test %reg1, %reg1
  545. jxx
  546. }
  547. begin
  548. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  549. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  550. asml.remove(p);
  551. p.free;
  552. p := hp1;
  553. ReleaseUsedRegs(TmpUsedRegs);
  554. Exit;
  555. end
  556. else
  557. { change
  558. mov %reg1, %reg2
  559. test/or %reg2, %reg2
  560. to
  561. mov %reg1, %reg2
  562. test/or %reg1, %reg1
  563. }
  564. begin
  565. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  566. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  567. end;
  568. ReleaseUsedRegs(TmpUsedRegs);
  569. end
  570. end
  571. else
  572. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  573. x >= RetOffset) as it doesn't do anything (it writes either to a
  574. parameter or to the temporary storage room for the function
  575. result)
  576. }
  577. if GetNextInstruction(p, hp1) and
  578. (tai(hp1).typ = ait_instruction) then
  579. begin
  580. if IsExitCode(hp1) and
  581. MatchOpType(p,top_reg,top_ref) and
  582. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  583. not(assigned(current_procinfo.procdef.funcretsym) and
  584. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  585. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  586. begin
  587. asml.remove(p);
  588. p.free;
  589. p := hp1;
  590. DebugMsg('Peephole removed deadstore before leave/ret',p);
  591. RemoveLastDeallocForFuncRes(p);
  592. end
  593. { change
  594. mov reg1, mem1
  595. cmp x, mem1
  596. to
  597. mov reg1, mem1
  598. cmp x, reg1
  599. }
  600. else if MatchOpType(p,top_reg,top_ref) and
  601. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  602. (taicpu(hp1).oper[1]^.typ = top_ref) and
  603. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  604. begin
  605. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  606. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  607. end;
  608. end;
  609. { Next instruction is also a MOV ? }
  610. if GetNextInstruction(p, hp1) and
  611. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  612. begin
  613. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  614. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  615. { mov reg1, mem1 or mov mem1, reg1
  616. mov mem2, reg2 mov reg2, mem2}
  617. begin
  618. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  619. {mov reg1, mem1 or mov mem1, reg1
  620. mov mem2, reg1 mov reg2, mem1}
  621. begin
  622. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  623. { Removes the second statement from
  624. mov reg1, mem1/reg2
  625. mov mem1/reg2, reg1 }
  626. begin
  627. if (taicpu(p).oper[0]^.typ = top_reg) then
  628. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  629. asml.remove(hp1);
  630. hp1.free;
  631. end
  632. else
  633. begin
  634. CopyUsedRegs(TmpUsedRegs);
  635. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  636. if (taicpu(p).oper[1]^.typ = top_ref) and
  637. { mov reg1, mem1
  638. mov mem2, reg1 }
  639. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  640. GetNextInstruction(hp1, hp2) and
  641. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  642. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  643. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  644. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  645. { change to
  646. mov reg1, mem1 mov reg1, mem1
  647. mov mem2, reg1 cmp reg1, mem2
  648. cmp mem1, reg1
  649. }
  650. begin
  651. asml.remove(hp2);
  652. hp2.free;
  653. taicpu(hp1).opcode := A_CMP;
  654. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  655. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  656. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  657. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  658. end;
  659. ReleaseUsedRegs(TmpUsedRegs);
  660. end;
  661. end
  662. else
  663. begin
  664. CopyUsedRegs(TmpUsedRegs);
  665. if GetNextInstruction(hp1, hp2) and
  666. MatchOpType(taicpu(p),top_ref,top_reg) and
  667. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  668. (taicpu(hp1).oper[1]^.typ = top_ref) and
  669. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  670. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  671. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  672. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  673. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  674. { mov mem1, %reg1
  675. mov %reg1, mem2
  676. mov mem2, reg2
  677. to:
  678. mov mem1, reg2
  679. mov reg2, mem2}
  680. begin
  681. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  682. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  683. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  684. asml.remove(hp2);
  685. hp2.free;
  686. end
  687. {$ifdef i386}
  688. { this is enabled for i386 only, as the rules to create the reg sets below
  689. are too complicated for x86-64, so this makes this code too error prone
  690. on x86-64
  691. }
  692. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  693. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  694. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  695. { mov mem1, reg1 mov mem1, reg1
  696. mov reg1, mem2 mov reg1, mem2
  697. mov mem2, reg2 mov mem2, reg1
  698. to: to:
  699. mov mem1, reg1 mov mem1, reg1
  700. mov mem1, reg2 mov reg1, mem2
  701. mov reg1, mem2
  702. or (if mem1 depends on reg1
  703. and/or if mem2 depends on reg2)
  704. to:
  705. mov mem1, reg1
  706. mov reg1, mem2
  707. mov reg1, reg2
  708. }
  709. begin
  710. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  711. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  712. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  713. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  714. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  715. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  716. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  717. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  718. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  719. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  720. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  721. end
  722. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  723. begin
  724. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  725. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  726. end
  727. else
  728. begin
  729. asml.remove(hp2);
  730. hp2.free;
  731. end
  732. {$endif i386}
  733. ;
  734. ReleaseUsedRegs(TmpUsedRegs);
  735. end;
  736. end
  737. (* { movl [mem1],reg1
  738. movl [mem1],reg2
  739. to
  740. movl [mem1],reg1
  741. movl reg1,reg2
  742. }
  743. else if (taicpu(p).oper[0]^.typ = top_ref) and
  744. (taicpu(p).oper[1]^.typ = top_reg) and
  745. (taicpu(hp1).oper[0]^.typ = top_ref) and
  746. (taicpu(hp1).oper[1]^.typ = top_reg) and
  747. (taicpu(p).opsize = taicpu(hp1).opsize) and
  748. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  749. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  750. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  751. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  752. else*)
  753. { movl const1,[mem1]
  754. movl [mem1],reg1
  755. to
  756. movl const1,reg1
  757. movl reg1,[mem1]
  758. }
  759. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  760. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  761. (taicpu(p).opsize = taicpu(hp1).opsize) and
  762. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  763. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  764. begin
  765. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  766. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  767. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  768. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  769. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  770. end
  771. end;
  772. if GetNextInstruction(p, hp1) and
  773. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  774. GetNextInstruction(hp1, hp2) and
  775. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  776. MatchOperand(Taicpu(p).oper[0]^,0) and
  777. (Taicpu(p).oper[1]^.typ = top_reg) and
  778. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  779. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  780. { mov reg1,0
  781. bts reg1,operand1 --> mov reg1,operand2
  782. or reg1,operand2 bts reg1,operand1}
  783. begin
  784. Taicpu(hp2).opcode:=A_MOV;
  785. asml.remove(hp1);
  786. insertllitem(hp2,hp2.next,hp1);
  787. asml.remove(p);
  788. p.free;
  789. p:=hp1;
  790. end;
  791. if GetNextInstruction(p, hp1) and
  792. MatchInstruction(hp1,A_LEA,[S_L]) and
  793. MatchOpType(Taicpu(p),top_ref,top_reg) and
  794. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  795. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  796. ) or
  797. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  798. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  799. )
  800. ) then
  801. { mov reg1,ref
  802. lea reg2,[reg1,reg2]
  803. to
  804. add reg2,ref}
  805. begin
  806. CopyUsedRegs(TmpUsedRegs);
  807. { reg1 may not be used afterwards }
  808. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  809. begin
  810. Taicpu(hp1).opcode:=A_ADD;
  811. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  812. DebugMsg('Peephole MovLea2Add done',hp1);
  813. asml.remove(p);
  814. p.free;
  815. p:=hp1;
  816. end;
  817. ReleaseUsedRegs(TmpUsedRegs);
  818. end;
  819. end;
  820. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  821. begin
  822. if MatchOperand(taicpu(p).oper[0]^,0) and
  823. (taicpu(p).oper[1]^.typ = Top_Reg) and
  824. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  825. { change "mov $0, %reg" into "xor %reg, %reg" }
  826. begin
  827. taicpu(p).opcode := A_XOR;
  828. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  829. end;
  830. end;
  831. end.