aoptx86.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1VMOVAP(var p : tai) : boolean;
  33. function OptPass1VOP(const p : tai) : boolean;
  34. function OptPass1MOV(var p : tai) : boolean;
  35. procedure DebugMsg(const s : string; p : tai);inline;
  36. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  37. function IsExitCode(p : tai) : boolean;
  38. procedure RemoveLastDeallocForFuncRes(p : tai);
  39. end;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  41. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  42. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  43. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  44. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  45. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  46. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  47. function RefsEqual(const r1, r2: treference): boolean;
  48. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  49. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  50. implementation
  51. uses
  52. cutils,
  53. verbose,
  54. aasmcpu,
  55. procinfo,
  56. symconst,symsym;
  57. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  58. begin
  59. result :=
  60. (instr.typ = ait_instruction) and
  61. (taicpu(instr).opcode = op) and
  62. ((opsize = []) or (taicpu(instr).opsize in opsize));
  63. end;
  64. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  65. begin
  66. result :=
  67. (instr.typ = ait_instruction) and
  68. ((taicpu(instr).opcode = op1) or
  69. (taicpu(instr).opcode = op2)
  70. ) and
  71. ((opsize = []) or (taicpu(instr).opsize in opsize));
  72. end;
  73. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  74. begin
  75. result :=
  76. (instr.typ = ait_instruction) and
  77. ((taicpu(instr).opcode = op1) or
  78. (taicpu(instr).opcode = op2) or
  79. (taicpu(instr).opcode = op3)
  80. ) and
  81. ((opsize = []) or (taicpu(instr).opsize in opsize));
  82. end;
  83. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  84. const opsize : topsizes) : boolean;
  85. var
  86. op : TAsmOp;
  87. begin
  88. result:=false;
  89. for op in ops do
  90. begin
  91. if (instr.typ = ait_instruction) and
  92. (taicpu(instr).opcode = op) and
  93. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  94. begin
  95. result:=true;
  96. exit;
  97. end;
  98. end;
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  105. begin
  106. result := (oper.typ = top_const) and (oper.val = a);
  107. end;
  108. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  109. begin
  110. result := oper1.typ = oper2.typ;
  111. if result then
  112. case oper1.typ of
  113. top_const:
  114. Result:=oper1.val = oper2.val;
  115. top_reg:
  116. Result:=oper1.reg = oper2.reg;
  117. top_ref:
  118. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  119. else
  120. internalerror(2013102801);
  121. end
  122. end;
  123. function RefsEqual(const r1, r2: treference): boolean;
  124. begin
  125. RefsEqual :=
  126. (r1.offset = r2.offset) and
  127. (r1.segment = r2.segment) and (r1.base = r2.base) and
  128. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  129. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  130. (r1.relsymbol = r2.relsymbol);
  131. end;
  132. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  133. begin
  134. Result:=(ref.offset=0) and
  135. (ref.scalefactor in [0,1]) and
  136. (ref.segment=NR_NO) and
  137. (ref.symbol=nil) and
  138. (ref.relsymbol=nil) and
  139. ((base=NR_INVALID) or
  140. (ref.base=base)) and
  141. ((index=NR_INVALID) or
  142. (ref.index=index));
  143. end;
  144. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  145. begin
  146. Result:=(taicpu(instr).ops=2) and
  147. (taicpu(instr).oper[0]^.typ=ot0) and
  148. (taicpu(instr).oper[1]^.typ=ot1);
  149. end;
  150. {$ifdef DEBUG_AOPTCPU}
  151. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  152. begin
  153. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  154. end;
  155. {$else DEBUG_AOPTCPU}
  156. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  157. begin
  158. end;
  159. {$endif DEBUG_AOPTCPU}
  160. { allocates register reg between (and including) instructions p1 and p2
  161. the type of p1 and p2 must not be in SkipInstr
  162. note that this routine is both called from the peephole optimizer
  163. where optinfo is not yet initialised) and from the cse (where it is) }
  164. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  165. var
  166. hp, start: tai;
  167. removedsomething,
  168. firstRemovedWasAlloc,
  169. lastRemovedWasDealloc: boolean;
  170. begin
  171. {$ifdef EXTDEBUG}
  172. { if assigned(p1.optinfo) and
  173. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  174. internalerror(2004101010); }
  175. {$endif EXTDEBUG}
  176. start := p1;
  177. if (reg = NR_ESP) or
  178. (reg = current_procinfo.framepointer) or
  179. not(assigned(p1)) then
  180. { this happens with registers which are loaded implicitely, outside the }
  181. { current block (e.g. esi with self) }
  182. exit;
  183. { make sure we allocate it for this instruction }
  184. getnextinstruction(p2,p2);
  185. lastRemovedWasDealloc := false;
  186. removedSomething := false;
  187. firstRemovedWasAlloc := false;
  188. {$ifdef allocregdebug}
  189. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  190. ' from here...'));
  191. insertllitem(asml,p1.previous,p1,hp);
  192. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' till here...'));
  194. insertllitem(asml,p2,p2.next,hp);
  195. {$endif allocregdebug}
  196. if not(RegInUsedRegs(reg,initialusedregs)) then
  197. begin
  198. hp := tai_regalloc.alloc(reg,nil);
  199. insertllItem(p1.previous,p1,hp);
  200. IncludeRegInUsedRegs(reg,initialusedregs);
  201. end;
  202. while assigned(p1) and
  203. (p1 <> p2) do
  204. begin
  205. if assigned(p1.optinfo) then
  206. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  207. p1 := tai(p1.next);
  208. repeat
  209. while assigned(p1) and
  210. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  211. p1 := tai(p1.next);
  212. { remove all allocation/deallocation info about the register in between }
  213. if assigned(p1) and
  214. (p1.typ = ait_regalloc) then
  215. if tai_regalloc(p1).reg=reg then
  216. begin
  217. if not removedSomething then
  218. begin
  219. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  220. removedSomething := true;
  221. end;
  222. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  223. hp := tai(p1.Next);
  224. asml.Remove(p1);
  225. p1.free;
  226. p1 := hp;
  227. end
  228. else p1 := tai(p1.next);
  229. until not(assigned(p1)) or
  230. not(p1.typ in SkipInstr);
  231. end;
  232. if assigned(p1) then
  233. begin
  234. if firstRemovedWasAlloc then
  235. begin
  236. hp := tai_regalloc.Alloc(reg,nil);
  237. insertLLItem(start.previous,start,hp);
  238. end;
  239. if lastRemovedWasDealloc then
  240. begin
  241. hp := tai_regalloc.DeAlloc(reg,nil);
  242. insertLLItem(p1.previous,p1,hp);
  243. end;
  244. end;
  245. end;
  246. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  247. var
  248. p: taicpu;
  249. begin
  250. if not assigned(hp) or
  251. (hp.typ <> ait_instruction) then
  252. begin
  253. Result := false;
  254. exit;
  255. end;
  256. p := taicpu(hp);
  257. Result :=
  258. (((p.opcode = A_MOV) or
  259. (p.opcode = A_MOVZX) or
  260. (p.opcode = A_MOVSX) or
  261. (p.opcode = A_LEA) or
  262. (p.opcode = A_VMOVSS) or
  263. (p.opcode = A_VMOVSD) or
  264. (p.opcode = A_VMOVAPD) or
  265. (p.opcode = A_VMOVAPS) or
  266. (p.opcode = A_VMOVQ) or
  267. (p.opcode = A_MOVSS) or
  268. (p.opcode = A_MOVSD) or
  269. (p.opcode = A_MOVQ) or
  270. (p.opcode = A_MOVAPD) or
  271. (p.opcode = A_MOVAPS)) and
  272. (p.oper[1]^.typ = top_reg) and
  273. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  274. ((p.oper[0]^.typ = top_const) or
  275. ((p.oper[0]^.typ = top_reg) and
  276. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  277. ((p.oper[0]^.typ = top_ref) and
  278. not RegInRef(reg,p.oper[0]^.ref^)))) or
  279. ((p.opcode = A_POP) and
  280. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  281. end;
  282. function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  283. var
  284. hp2,hp3 : tai;
  285. begin
  286. result:=(p.typ=ait_instruction) and
  287. ((taicpu(p).opcode = A_RET) or
  288. ((taicpu(p).opcode=A_LEAVE) and
  289. GetNextInstruction(p,hp2) and
  290. (hp2.typ=ait_instruction) and
  291. (taicpu(hp2).opcode=A_RET)
  292. ) or
  293. ((taicpu(p).opcode=A_MOV) and
  294. (taicpu(p).oper[0]^.typ=top_reg) and
  295. (taicpu(p).oper[0]^.reg=NR_EBP) and
  296. (taicpu(p).oper[1]^.typ=top_reg) and
  297. (taicpu(p).oper[1]^.reg=NR_ESP) and
  298. GetNextInstruction(p,hp2) and
  299. (hp2.typ=ait_instruction) and
  300. (taicpu(hp2).opcode=A_POP) and
  301. (taicpu(hp2).oper[0]^.typ=top_reg) and
  302. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  303. GetNextInstruction(hp2,hp3) and
  304. (hp3.typ=ait_instruction) and
  305. (taicpu(hp3).opcode=A_RET)
  306. )
  307. );
  308. end;
  309. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  310. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  311. var
  312. hp2: tai;
  313. begin
  314. hp2 := p;
  315. repeat
  316. hp2 := tai(hp2.previous);
  317. if assigned(hp2) and
  318. (hp2.typ = ait_regalloc) and
  319. (tai_regalloc(hp2).ratype=ra_dealloc) and
  320. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  321. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  322. begin
  323. asml.remove(hp2);
  324. hp2.free;
  325. break;
  326. end;
  327. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  328. end;
  329. begin
  330. case current_procinfo.procdef.returndef.typ of
  331. arraydef,recorddef,pointerdef,
  332. stringdef,enumdef,procdef,objectdef,errordef,
  333. filedef,setdef,procvardef,
  334. classrefdef,forwarddef:
  335. DoRemoveLastDeallocForFuncRes(RS_EAX);
  336. orddef:
  337. if current_procinfo.procdef.returndef.size <> 0 then
  338. begin
  339. DoRemoveLastDeallocForFuncRes(RS_EAX);
  340. { for int64/qword }
  341. if current_procinfo.procdef.returndef.size = 8 then
  342. DoRemoveLastDeallocForFuncRes(RS_EDX);
  343. end;
  344. end;
  345. end;
  346. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  347. var
  348. TmpUsedRegs : TAllUsedRegs;
  349. hp1,hp2 : tai;
  350. begin
  351. result:=false;
  352. if MatchOpType(taicpu(p),top_reg,top_reg) then
  353. begin
  354. { vmova* reg1,reg1
  355. =>
  356. <nop> }
  357. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  358. begin
  359. GetNextInstruction(p,hp1);
  360. asml.Remove(p);
  361. p.Free;
  362. p:=hp1;
  363. result:=true;
  364. end
  365. else if GetNextInstruction(p,hp1) then
  366. begin
  367. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  368. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  369. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  370. begin
  371. { vmova* reg1,reg2
  372. vmova* reg2,reg3
  373. dealloc reg2
  374. =>
  375. vmova* reg1,reg3 }
  376. CopyUsedRegs(TmpUsedRegs);
  377. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  378. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  379. begin
  380. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  381. asml.Remove(hp1);
  382. hp1.Free;
  383. result:=true;
  384. end
  385. { special case:
  386. vmova* reg1,reg2
  387. vmova* reg2,reg1
  388. =>
  389. vmova* reg1,reg2 }
  390. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  391. begin
  392. asml.Remove(hp1);
  393. hp1.Free;
  394. result:=true;
  395. end
  396. end
  397. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  398. { we mix single and double opperations here because we assume that the compiler
  399. generates vmovapd only after double operations and vmovaps only after single operations }
  400. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  401. GetNextInstruction(hp1,hp2) and
  402. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  403. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  404. begin
  405. CopyUsedRegs(TmpUsedRegs);
  406. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  407. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  408. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  409. then
  410. begin
  411. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  412. asml.Remove(p);
  413. p.Free;
  414. asml.Remove(hp2);
  415. hp2.Free;
  416. p:=hp1;
  417. end;
  418. end;
  419. end;
  420. end;
  421. end;
  422. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  423. var
  424. TmpUsedRegs : TAllUsedRegs;
  425. hp1 : tai;
  426. begin
  427. result:=false;
  428. if GetNextInstruction(p,hp1) and
  429. { we mix single and double opperations here because we assume that the compiler
  430. generates vmovapd only after double operations and vmovaps only after single operations }
  431. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  432. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  433. (taicpu(hp1).oper[1]^.typ=top_reg) then
  434. begin
  435. CopyUsedRegs(TmpUsedRegs);
  436. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  437. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  438. ) then
  439. begin
  440. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  441. asml.Remove(hp1);
  442. hp1.Free;
  443. result:=true;
  444. end;
  445. end;
  446. end;
  447. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  448. var
  449. hp1, hp2: tai;
  450. TmpUsedRegs : TAllUsedRegs;
  451. GetNextIntruction_p : Boolean;
  452. begin
  453. Result:=false;
  454. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  455. if (taicpu(p).oper[1]^.typ = top_reg) and
  456. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  457. GetNextIntruction_p and
  458. MatchInstruction(hp1,A_MOV,[]) and
  459. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  460. begin
  461. CopyUsedRegs(TmpUsedRegs);
  462. { we have
  463. mov x, %treg
  464. mov %treg, y
  465. }
  466. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  467. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  468. { we've got
  469. mov x, %treg
  470. mov %treg, y
  471. with %treg is not used after }
  472. case taicpu(p).oper[0]^.typ Of
  473. top_reg:
  474. begin
  475. { change
  476. mov %reg, %treg
  477. mov %treg, y
  478. to
  479. mov %reg, y
  480. }
  481. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  482. asml.remove(hp1);
  483. hp1.free;
  484. ReleaseUsedRegs(TmpUsedRegs);
  485. Exit;
  486. end;
  487. top_ref:
  488. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  489. begin
  490. { change
  491. mov mem, %treg
  492. mov %treg, %reg
  493. to
  494. mov mem, %reg"
  495. }
  496. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  497. asml.remove(hp1);
  498. hp1.free;
  499. ReleaseUsedRegs(TmpUsedRegs);
  500. Exit;
  501. end;
  502. end;
  503. ReleaseUsedRegs(TmpUsedRegs);
  504. end
  505. else
  506. { Change
  507. mov %reg1, %reg2
  508. xxx %reg2, ???
  509. to
  510. mov %reg1, %reg2
  511. xxx %reg1, ???
  512. to avoid a write/read penalty
  513. }
  514. if MatchOpType(taicpu(p),top_reg,top_reg) and
  515. GetNextInstruction(p,hp1) and
  516. (tai(hp1).typ = ait_instruction) and
  517. (taicpu(hp1).ops >= 1) and
  518. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  519. { we have
  520. mov %reg1, %reg2
  521. XXX %reg2, ???
  522. }
  523. begin
  524. if ((taicpu(hp1).opcode = A_OR) or
  525. (taicpu(hp1).opcode = A_TEST)) and
  526. (taicpu(hp1).oper[1]^.typ = top_reg) and
  527. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  528. { we have
  529. mov %reg1, %reg2
  530. test/or %reg2, %reg2
  531. }
  532. begin
  533. CopyUsedRegs(TmpUsedRegs);
  534. { reg1 will be used after the first instruction,
  535. so update the allocation info }
  536. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  537. if GetNextInstruction(hp1, hp2) and
  538. (hp2.typ = ait_instruction) and
  539. taicpu(hp2).is_jmp and
  540. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  541. { change
  542. mov %reg1, %reg2
  543. test/or %reg2, %reg2
  544. jxx
  545. to
  546. test %reg1, %reg1
  547. jxx
  548. }
  549. begin
  550. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  551. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  552. asml.remove(p);
  553. p.free;
  554. p := hp1;
  555. ReleaseUsedRegs(TmpUsedRegs);
  556. Exit;
  557. end
  558. else
  559. { change
  560. mov %reg1, %reg2
  561. test/or %reg2, %reg2
  562. to
  563. mov %reg1, %reg2
  564. test/or %reg1, %reg1
  565. }
  566. begin
  567. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  568. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  569. end;
  570. ReleaseUsedRegs(TmpUsedRegs);
  571. end
  572. end
  573. else
  574. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  575. x >= RetOffset) as it doesn't do anything (it writes either to a
  576. parameter or to the temporary storage room for the function
  577. result)
  578. }
  579. if GetNextIntruction_p and
  580. (tai(hp1).typ = ait_instruction) then
  581. begin
  582. if IsExitCode(hp1) and
  583. MatchOpType(p,top_reg,top_ref) and
  584. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  585. not(assigned(current_procinfo.procdef.funcretsym) and
  586. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  587. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  588. begin
  589. asml.remove(p);
  590. p.free;
  591. p := hp1;
  592. DebugMsg('Peephole removed deadstore before leave/ret',p);
  593. RemoveLastDeallocForFuncRes(p);
  594. end
  595. { change
  596. mov reg1, mem1
  597. cmp x, mem1
  598. to
  599. mov reg1, mem1
  600. cmp x, reg1
  601. }
  602. else if MatchOpType(p,top_reg,top_ref) and
  603. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  604. (taicpu(hp1).oper[1]^.typ = top_ref) and
  605. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  606. begin
  607. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  608. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  609. end;
  610. end;
  611. { Next instruction is also a MOV ? }
  612. if GetNextIntruction_p and
  613. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  614. begin
  615. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  616. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  617. { mov reg1, mem1 or mov mem1, reg1
  618. mov mem2, reg2 mov reg2, mem2}
  619. begin
  620. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  621. {mov reg1, mem1 or mov mem1, reg1
  622. mov mem2, reg1 mov reg2, mem1}
  623. begin
  624. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  625. { Removes the second statement from
  626. mov reg1, mem1/reg2
  627. mov mem1/reg2, reg1 }
  628. begin
  629. if (taicpu(p).oper[0]^.typ = top_reg) then
  630. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  631. asml.remove(hp1);
  632. hp1.free;
  633. end
  634. else
  635. begin
  636. CopyUsedRegs(TmpUsedRegs);
  637. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  638. if (taicpu(p).oper[1]^.typ = top_ref) and
  639. { mov reg1, mem1
  640. mov mem2, reg1 }
  641. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  642. GetNextInstruction(hp1, hp2) and
  643. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  644. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  645. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  646. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  647. { change to
  648. mov reg1, mem1 mov reg1, mem1
  649. mov mem2, reg1 cmp reg1, mem2
  650. cmp mem1, reg1
  651. }
  652. begin
  653. asml.remove(hp2);
  654. hp2.free;
  655. taicpu(hp1).opcode := A_CMP;
  656. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  657. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  658. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  659. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  660. end;
  661. ReleaseUsedRegs(TmpUsedRegs);
  662. end;
  663. end
  664. else
  665. begin
  666. CopyUsedRegs(TmpUsedRegs);
  667. if GetNextInstruction(hp1, hp2) and
  668. MatchOpType(taicpu(p),top_ref,top_reg) and
  669. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  670. (taicpu(hp1).oper[1]^.typ = top_ref) and
  671. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  672. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  673. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  674. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  675. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  676. { mov mem1, %reg1
  677. mov %reg1, mem2
  678. mov mem2, reg2
  679. to:
  680. mov mem1, reg2
  681. mov reg2, mem2}
  682. begin
  683. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  684. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  685. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  686. asml.remove(hp2);
  687. hp2.free;
  688. end
  689. {$ifdef i386}
  690. { this is enabled for i386 only, as the rules to create the reg sets below
  691. are too complicated for x86-64, so this makes this code too error prone
  692. on x86-64
  693. }
  694. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  695. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  696. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  697. { mov mem1, reg1 mov mem1, reg1
  698. mov reg1, mem2 mov reg1, mem2
  699. mov mem2, reg2 mov mem2, reg1
  700. to: to:
  701. mov mem1, reg1 mov mem1, reg1
  702. mov mem1, reg2 mov reg1, mem2
  703. mov reg1, mem2
  704. or (if mem1 depends on reg1
  705. and/or if mem2 depends on reg2)
  706. to:
  707. mov mem1, reg1
  708. mov reg1, mem2
  709. mov reg1, reg2
  710. }
  711. begin
  712. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  713. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  714. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  715. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  716. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  717. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  718. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  719. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  720. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  721. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  722. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  723. end
  724. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  725. begin
  726. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  727. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  728. end
  729. else
  730. begin
  731. asml.remove(hp2);
  732. hp2.free;
  733. end
  734. {$endif i386}
  735. ;
  736. ReleaseUsedRegs(TmpUsedRegs);
  737. end;
  738. end
  739. (* { movl [mem1],reg1
  740. movl [mem1],reg2
  741. to
  742. movl [mem1],reg1
  743. movl reg1,reg2
  744. }
  745. else if (taicpu(p).oper[0]^.typ = top_ref) and
  746. (taicpu(p).oper[1]^.typ = top_reg) and
  747. (taicpu(hp1).oper[0]^.typ = top_ref) and
  748. (taicpu(hp1).oper[1]^.typ = top_reg) and
  749. (taicpu(p).opsize = taicpu(hp1).opsize) and
  750. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  751. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  752. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  753. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  754. else*)
  755. { movl const1,[mem1]
  756. movl [mem1],reg1
  757. to
  758. movl const1,reg1
  759. movl reg1,[mem1]
  760. }
  761. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  762. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  763. (taicpu(p).opsize = taicpu(hp1).opsize) and
  764. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  765. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  766. begin
  767. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  768. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  769. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  770. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  771. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  772. end
  773. end;
  774. if GetNextIntruction_p and
  775. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  776. GetNextInstruction(hp1, hp2) and
  777. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  778. MatchOperand(Taicpu(p).oper[0]^,0) and
  779. (Taicpu(p).oper[1]^.typ = top_reg) and
  780. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  781. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  782. { mov reg1,0
  783. bts reg1,operand1 --> mov reg1,operand2
  784. or reg1,operand2 bts reg1,operand1}
  785. begin
  786. Taicpu(hp2).opcode:=A_MOV;
  787. asml.remove(hp1);
  788. insertllitem(hp2,hp2.next,hp1);
  789. asml.remove(p);
  790. p.free;
  791. p:=hp1;
  792. end;
  793. if GetNextIntruction_p and
  794. MatchInstruction(hp1,A_LEA,[S_L]) and
  795. MatchOpType(Taicpu(p),top_ref,top_reg) and
  796. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  797. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  798. ) or
  799. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  800. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  801. )
  802. ) then
  803. { mov reg1,ref
  804. lea reg2,[reg1,reg2]
  805. to
  806. add reg2,ref}
  807. begin
  808. CopyUsedRegs(TmpUsedRegs);
  809. { reg1 may not be used afterwards }
  810. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  811. begin
  812. Taicpu(hp1).opcode:=A_ADD;
  813. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  814. DebugMsg('Peephole MovLea2Add done',hp1);
  815. asml.remove(p);
  816. p.free;
  817. p:=hp1;
  818. end;
  819. ReleaseUsedRegs(TmpUsedRegs);
  820. end;
  821. end;
  822. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  823. begin
  824. if MatchOperand(taicpu(p).oper[0]^,0) and
  825. (taicpu(p).oper[1]^.typ = Top_Reg) and
  826. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  827. { change "mov $0, %reg" into "xor %reg, %reg" }
  828. begin
  829. taicpu(p).opcode := A_XOR;
  830. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  831. end;
  832. end;
  833. end.