aoptx86.pas 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. if not(RegInUsedRegs(reg,initialusedregs)) then
  200. begin
  201. hp := tai_regalloc.alloc(reg,nil);
  202. insertllItem(p1.previous,p1,hp);
  203. IncludeRegInUsedRegs(reg,initialusedregs);
  204. end;
  205. while assigned(p1) and
  206. (p1 <> p2) do
  207. begin
  208. if assigned(p1.optinfo) then
  209. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  210. p1 := tai(p1.next);
  211. repeat
  212. while assigned(p1) and
  213. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  214. p1 := tai(p1.next);
  215. { remove all allocation/deallocation info about the register in between }
  216. if assigned(p1) and
  217. (p1.typ = ait_regalloc) then
  218. if tai_regalloc(p1).reg=reg then
  219. begin
  220. if not removedSomething then
  221. begin
  222. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  223. removedSomething := true;
  224. end;
  225. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  226. hp := tai(p1.Next);
  227. asml.Remove(p1);
  228. p1.free;
  229. p1 := hp;
  230. end
  231. else p1 := tai(p1.next);
  232. until not(assigned(p1)) or
  233. not(p1.typ in SkipInstr);
  234. end;
  235. if assigned(p1) then
  236. begin
  237. if firstRemovedWasAlloc then
  238. begin
  239. hp := tai_regalloc.Alloc(reg,nil);
  240. insertLLItem(start.previous,start,hp);
  241. end;
  242. if lastRemovedWasDealloc then
  243. begin
  244. hp := tai_regalloc.DeAlloc(reg,nil);
  245. insertLLItem(p1.previous,p1,hp);
  246. end;
  247. end;
  248. end;
  249. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  250. var
  251. p: taicpu;
  252. begin
  253. if not assigned(hp) or
  254. (hp.typ <> ait_instruction) then
  255. begin
  256. Result := false;
  257. exit;
  258. end;
  259. p := taicpu(hp);
  260. Result :=
  261. (((p.opcode = A_MOV) or
  262. (p.opcode = A_MOVZX) or
  263. (p.opcode = A_MOVSX) or
  264. (p.opcode = A_LEA) or
  265. (p.opcode = A_VMOVSS) or
  266. (p.opcode = A_VMOVSD) or
  267. (p.opcode = A_VMOVAPD) or
  268. (p.opcode = A_VMOVAPS) or
  269. (p.opcode = A_VMOVQ) or
  270. (p.opcode = A_MOVSS) or
  271. (p.opcode = A_MOVSD) or
  272. (p.opcode = A_MOVQ) or
  273. (p.opcode = A_MOVAPD) or
  274. (p.opcode = A_MOVAPS)) and
  275. (p.oper[1]^.typ = top_reg) and
  276. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  277. ((p.oper[0]^.typ = top_const) or
  278. ((p.oper[0]^.typ = top_reg) and
  279. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  280. ((p.oper[0]^.typ = top_ref) and
  281. not RegInRef(reg,p.oper[0]^.ref^)))) or
  282. ((p.opcode = A_POP) and
  283. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  284. end;
  285. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  286. var
  287. hp2,hp3 : tai;
  288. begin
  289. result:=(p.typ=ait_instruction) and
  290. ((taicpu(p).opcode = A_RET) or
  291. ((taicpu(p).opcode=A_LEAVE) and
  292. GetNextInstruction(p,hp2) and
  293. (hp2.typ=ait_instruction) and
  294. (taicpu(hp2).opcode=A_RET)
  295. ) or
  296. ((taicpu(p).opcode=A_MOV) and
  297. (taicpu(p).oper[0]^.typ=top_reg) and
  298. (taicpu(p).oper[0]^.reg=NR_EBP) and
  299. (taicpu(p).oper[1]^.typ=top_reg) and
  300. (taicpu(p).oper[1]^.reg=NR_ESP) and
  301. GetNextInstruction(p,hp2) and
  302. (hp2.typ=ait_instruction) and
  303. (taicpu(hp2).opcode=A_POP) and
  304. (taicpu(hp2).oper[0]^.typ=top_reg) and
  305. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  306. GetNextInstruction(hp2,hp3) and
  307. (hp3.typ=ait_instruction) and
  308. (taicpu(hp3).opcode=A_RET)
  309. )
  310. );
  311. end;
  312. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  313. begin
  314. isFoldableArithOp := False;
  315. case hp1.opcode of
  316. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  317. isFoldableArithOp :=
  318. ((taicpu(hp1).oper[0]^.typ = top_const) or
  319. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  320. (taicpu(hp1).oper[0]^.reg <> reg))) and
  321. (taicpu(hp1).oper[1]^.typ = top_reg) and
  322. (taicpu(hp1).oper[1]^.reg = reg);
  323. A_INC,A_DEC,A_NEG,A_NOT:
  324. isFoldableArithOp :=
  325. (taicpu(hp1).oper[0]^.typ = top_reg) and
  326. (taicpu(hp1).oper[0]^.reg = reg);
  327. end;
  328. end;
  329. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  330. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  331. var
  332. hp2: tai;
  333. begin
  334. hp2 := p;
  335. repeat
  336. hp2 := tai(hp2.previous);
  337. if assigned(hp2) and
  338. (hp2.typ = ait_regalloc) and
  339. (tai_regalloc(hp2).ratype=ra_dealloc) and
  340. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  341. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  342. begin
  343. asml.remove(hp2);
  344. hp2.free;
  345. break;
  346. end;
  347. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  348. end;
  349. begin
  350. case current_procinfo.procdef.returndef.typ of
  351. arraydef,recorddef,pointerdef,
  352. stringdef,enumdef,procdef,objectdef,errordef,
  353. filedef,setdef,procvardef,
  354. classrefdef,forwarddef:
  355. DoRemoveLastDeallocForFuncRes(RS_EAX);
  356. orddef:
  357. if current_procinfo.procdef.returndef.size <> 0 then
  358. begin
  359. DoRemoveLastDeallocForFuncRes(RS_EAX);
  360. { for int64/qword }
  361. if current_procinfo.procdef.returndef.size = 8 then
  362. DoRemoveLastDeallocForFuncRes(RS_EDX);
  363. end;
  364. end;
  365. end;
  366. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  367. var
  368. TmpUsedRegs : TAllUsedRegs;
  369. hp1,hp2 : tai;
  370. begin
  371. result:=false;
  372. if MatchOpType(taicpu(p),top_reg,top_reg) then
  373. begin
  374. { vmova* reg1,reg1
  375. =>
  376. <nop> }
  377. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  378. begin
  379. GetNextInstruction(p,hp1);
  380. asml.Remove(p);
  381. p.Free;
  382. p:=hp1;
  383. result:=true;
  384. end
  385. else if GetNextInstruction(p,hp1) then
  386. begin
  387. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  388. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  389. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  390. begin
  391. { vmova* reg1,reg2
  392. vmova* reg2,reg3
  393. dealloc reg2
  394. =>
  395. vmova* reg1,reg3 }
  396. CopyUsedRegs(TmpUsedRegs);
  397. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  398. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  399. begin
  400. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  401. asml.Remove(hp1);
  402. hp1.Free;
  403. result:=true;
  404. end
  405. { special case:
  406. vmova* reg1,reg2
  407. vmova* reg2,reg1
  408. =>
  409. vmova* reg1,reg2 }
  410. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  411. begin
  412. asml.Remove(hp1);
  413. hp1.Free;
  414. result:=true;
  415. end
  416. end
  417. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  418. { we mix single and double opperations here because we assume that the compiler
  419. generates vmovapd only after double operations and vmovaps only after single operations }
  420. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  421. GetNextInstruction(hp1,hp2) and
  422. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  423. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  424. begin
  425. CopyUsedRegs(TmpUsedRegs);
  426. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  427. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  428. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  429. then
  430. begin
  431. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  432. asml.Remove(p);
  433. p.Free;
  434. asml.Remove(hp2);
  435. hp2.Free;
  436. p:=hp1;
  437. end;
  438. end;
  439. end;
  440. end;
  441. end;
  442. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  443. var
  444. TmpUsedRegs : TAllUsedRegs;
  445. hp1 : tai;
  446. begin
  447. result:=false;
  448. if GetNextInstruction(p,hp1) and
  449. { we mix single and double opperations here because we assume that the compiler
  450. generates vmovapd only after double operations and vmovaps only after single operations }
  451. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  452. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  453. (taicpu(hp1).oper[1]^.typ=top_reg) then
  454. begin
  455. CopyUsedRegs(TmpUsedRegs);
  456. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  457. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  458. ) then
  459. begin
  460. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  461. asml.Remove(hp1);
  462. hp1.Free;
  463. result:=true;
  464. end;
  465. end;
  466. end;
  467. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  468. var
  469. hp1, hp2: tai;
  470. TmpUsedRegs : TAllUsedRegs;
  471. GetNextIntruction_p : Boolean;
  472. begin
  473. Result:=false;
  474. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  475. if GetNextIntruction_p and
  476. MatchInstruction(hp1,A_AND,[]) and
  477. (taicpu(p).oper[1]^.typ = top_reg) and
  478. MatchOpType(taicpu(hp1),top_const,top_reg) and
  479. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  480. case taicpu(p).opsize Of
  481. S_L:
  482. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  483. begin
  484. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  485. asml.remove(hp1);
  486. hp1.free;
  487. Result:=true;
  488. exit;
  489. end;
  490. end
  491. else if GetNextIntruction_p and
  492. MatchInstruction(hp1,A_MOV,[]) and
  493. (taicpu(p).oper[1]^.typ = top_reg) and
  494. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  495. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  496. begin
  497. CopyUsedRegs(TmpUsedRegs);
  498. { we have
  499. mov x, %treg
  500. mov %treg, y
  501. }
  502. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  503. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  504. { we've got
  505. mov x, %treg
  506. mov %treg, y
  507. with %treg is not used after }
  508. case taicpu(p).oper[0]^.typ Of
  509. top_reg:
  510. begin
  511. { change
  512. mov %reg, %treg
  513. mov %treg, y
  514. to
  515. mov %reg, y
  516. }
  517. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  518. asml.remove(hp1);
  519. hp1.free;
  520. ReleaseUsedRegs(TmpUsedRegs);
  521. Exit;
  522. end;
  523. top_ref:
  524. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  525. begin
  526. { change
  527. mov mem, %treg
  528. mov %treg, %reg
  529. to
  530. mov mem, %reg"
  531. }
  532. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  533. asml.remove(hp1);
  534. hp1.free;
  535. ReleaseUsedRegs(TmpUsedRegs);
  536. Exit;
  537. end;
  538. end;
  539. ReleaseUsedRegs(TmpUsedRegs);
  540. end
  541. else
  542. { Change
  543. mov %reg1, %reg2
  544. xxx %reg2, ???
  545. to
  546. mov %reg1, %reg2
  547. xxx %reg1, ???
  548. to avoid a write/read penalty
  549. }
  550. if MatchOpType(taicpu(p),top_reg,top_reg) and
  551. GetNextInstruction(p,hp1) and
  552. (tai(hp1).typ = ait_instruction) and
  553. (taicpu(hp1).ops >= 1) and
  554. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  555. { we have
  556. mov %reg1, %reg2
  557. XXX %reg2, ???
  558. }
  559. begin
  560. if ((taicpu(hp1).opcode = A_OR) or
  561. (taicpu(hp1).opcode = A_TEST)) and
  562. (taicpu(hp1).oper[1]^.typ = top_reg) and
  563. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  564. { we have
  565. mov %reg1, %reg2
  566. test/or %reg2, %reg2
  567. }
  568. begin
  569. CopyUsedRegs(TmpUsedRegs);
  570. { reg1 will be used after the first instruction,
  571. so update the allocation info }
  572. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  573. if GetNextInstruction(hp1, hp2) and
  574. (hp2.typ = ait_instruction) and
  575. taicpu(hp2).is_jmp and
  576. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  577. { change
  578. mov %reg1, %reg2
  579. test/or %reg2, %reg2
  580. jxx
  581. to
  582. test %reg1, %reg1
  583. jxx
  584. }
  585. begin
  586. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  587. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  588. asml.remove(p);
  589. p.free;
  590. p := hp1;
  591. ReleaseUsedRegs(TmpUsedRegs);
  592. Exit;
  593. end
  594. else
  595. { change
  596. mov %reg1, %reg2
  597. test/or %reg2, %reg2
  598. to
  599. mov %reg1, %reg2
  600. test/or %reg1, %reg1
  601. }
  602. begin
  603. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  604. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  605. end;
  606. ReleaseUsedRegs(TmpUsedRegs);
  607. end
  608. end
  609. else
  610. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  611. x >= RetOffset) as it doesn't do anything (it writes either to a
  612. parameter or to the temporary storage room for the function
  613. result)
  614. }
  615. if GetNextIntruction_p and
  616. (tai(hp1).typ = ait_instruction) then
  617. begin
  618. if IsExitCode(hp1) and
  619. MatchOpType(p,top_reg,top_ref) and
  620. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  621. not(assigned(current_procinfo.procdef.funcretsym) and
  622. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  623. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  624. begin
  625. asml.remove(p);
  626. p.free;
  627. p := hp1;
  628. DebugMsg('Peephole removed deadstore before leave/ret',p);
  629. RemoveLastDeallocForFuncRes(p);
  630. end
  631. { change
  632. mov reg1, mem1
  633. cmp x, mem1
  634. to
  635. mov reg1, mem1
  636. cmp x, reg1
  637. }
  638. else if MatchOpType(p,top_reg,top_ref) and
  639. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  640. (taicpu(hp1).oper[1]^.typ = top_ref) and
  641. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  642. begin
  643. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  644. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  645. end;
  646. end;
  647. { Next instruction is also a MOV ? }
  648. if GetNextIntruction_p and
  649. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  650. begin
  651. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  652. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  653. { mov reg1, mem1 or mov mem1, reg1
  654. mov mem2, reg2 mov reg2, mem2}
  655. begin
  656. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  657. { mov reg1, mem1 or mov mem1, reg1
  658. mov mem2, reg1 mov reg2, mem1}
  659. begin
  660. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  661. { Removes the second statement from
  662. mov reg1, mem1/reg2
  663. mov mem1/reg2, reg1 }
  664. begin
  665. if (taicpu(p).oper[0]^.typ = top_reg) then
  666. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  667. DebugMsg('PeepHole Optimization,MovMov2Mov1',p);
  668. asml.remove(hp1);
  669. hp1.free;
  670. Result:=true;
  671. exit;
  672. end
  673. else
  674. begin
  675. CopyUsedRegs(TmpUsedRegs);
  676. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  677. if (taicpu(p).oper[1]^.typ = top_ref) and
  678. { mov reg1, mem1
  679. mov mem2, reg1 }
  680. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  681. GetNextInstruction(hp1, hp2) and
  682. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  683. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  684. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  685. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  686. { change to
  687. mov reg1, mem1 mov reg1, mem1
  688. mov mem2, reg1 cmp reg1, mem2
  689. cmp mem1, reg1
  690. }
  691. begin
  692. asml.remove(hp2);
  693. hp2.free;
  694. taicpu(hp1).opcode := A_CMP;
  695. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  696. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  697. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  698. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  699. end;
  700. ReleaseUsedRegs(TmpUsedRegs);
  701. end;
  702. end
  703. else if (taicpu(p).oper[1]^.typ=top_ref) and
  704. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  705. begin
  706. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  707. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  708. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  709. end
  710. else
  711. begin
  712. CopyUsedRegs(TmpUsedRegs);
  713. if GetNextInstruction(hp1, hp2) and
  714. MatchOpType(taicpu(p),top_ref,top_reg) and
  715. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  716. (taicpu(hp1).oper[1]^.typ = top_ref) and
  717. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  718. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  719. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  720. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  721. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  722. { mov mem1, %reg1
  723. mov %reg1, mem2
  724. mov mem2, reg2
  725. to:
  726. mov mem1, reg2
  727. mov reg2, mem2}
  728. begin
  729. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  730. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  731. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  732. asml.remove(hp2);
  733. hp2.free;
  734. end
  735. {$ifdef i386}
  736. { this is enabled for i386 only, as the rules to create the reg sets below
  737. are too complicated for x86-64, so this makes this code too error prone
  738. on x86-64
  739. }
  740. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  741. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  742. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  743. { mov mem1, reg1 mov mem1, reg1
  744. mov reg1, mem2 mov reg1, mem2
  745. mov mem2, reg2 mov mem2, reg1
  746. to: to:
  747. mov mem1, reg1 mov mem1, reg1
  748. mov mem1, reg2 mov reg1, mem2
  749. mov reg1, mem2
  750. or (if mem1 depends on reg1
  751. and/or if mem2 depends on reg2)
  752. to:
  753. mov mem1, reg1
  754. mov reg1, mem2
  755. mov reg1, reg2
  756. }
  757. begin
  758. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  759. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  760. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  761. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  762. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  763. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  764. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  765. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  766. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  767. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  768. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  769. end
  770. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  771. begin
  772. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  773. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  774. end
  775. else
  776. begin
  777. asml.remove(hp2);
  778. hp2.free;
  779. end
  780. {$endif i386}
  781. ;
  782. ReleaseUsedRegs(TmpUsedRegs);
  783. end;
  784. end
  785. (* { movl [mem1],reg1
  786. movl [mem1],reg2
  787. to
  788. movl [mem1],reg1
  789. movl reg1,reg2
  790. }
  791. else if (taicpu(p).oper[0]^.typ = top_ref) and
  792. (taicpu(p).oper[1]^.typ = top_reg) and
  793. (taicpu(hp1).oper[0]^.typ = top_ref) and
  794. (taicpu(hp1).oper[1]^.typ = top_reg) and
  795. (taicpu(p).opsize = taicpu(hp1).opsize) and
  796. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  797. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  798. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  799. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  800. else*)
  801. { movl const1,[mem1]
  802. movl [mem1],reg1
  803. to
  804. movl const1,reg1
  805. movl reg1,[mem1]
  806. }
  807. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  808. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  809. (taicpu(p).opsize = taicpu(hp1).opsize) and
  810. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  811. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  812. begin
  813. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  814. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  815. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  816. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  817. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  818. end
  819. end
  820. else if (taicpu(p).oper[1]^.typ = top_reg) and
  821. GetNextIntruction_p and
  822. (hp1.typ = ait_instruction) and
  823. GetNextInstruction(hp1, hp2) and
  824. (hp2.typ = ait_instruction) and
  825. MatchInstruction(taicpu(hp2),A_MOV,[]) and
  826. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  827. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  828. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  829. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  830. ) then
  831. { change movsX/movzX reg/ref, reg2
  832. add/sub/or/... reg3/$const, reg2
  833. mov reg2 reg/ref
  834. to add/sub/or/... reg3/$const, reg/ref }
  835. begin
  836. CopyUsedRegs(TmpUsedRegs);
  837. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  838. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  839. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  840. begin
  841. { by example:
  842. movswl %si,%eax movswl %si,%eax p
  843. decl %eax addl %edx,%eax hp1
  844. movw %ax,%si movw %ax,%si hp2
  845. ->
  846. movswl %si,%eax movswl %si,%eax p
  847. decw %eax addw %edx,%eax hp1
  848. movw %ax,%si movw %ax,%si hp2
  849. }
  850. DebugMsg('PeepHole Optimization '+
  851. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  852. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  853. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  854. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  855. {
  856. ->
  857. movswl %si,%eax movswl %si,%eax p
  858. decw %si addw %dx,%si hp1
  859. movw %ax,%si movw %ax,%si hp2
  860. }
  861. case taicpu(hp1).ops of
  862. 1:
  863. begin
  864. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  865. if taicpu(hp1).oper[0]^.typ=top_reg then
  866. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  867. end;
  868. 2:
  869. begin
  870. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  871. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  872. (taicpu(hp1).opcode<>A_SHL) and
  873. (taicpu(hp1).opcode<>A_SHR) and
  874. (taicpu(hp1).opcode<>A_SAR) then
  875. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  876. end;
  877. else
  878. internalerror(2008042701);
  879. end;
  880. {
  881. ->
  882. decw %si addw %dx,%si p
  883. }
  884. asml.remove(p);
  885. asml.remove(hp2);
  886. p.Free;
  887. hp2.Free;
  888. p := hp1;
  889. end;
  890. ReleaseUsedRegs(TmpUsedRegs);
  891. end;
  892. if GetNextIntruction_p and
  893. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  894. GetNextInstruction(hp1, hp2) and
  895. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  896. MatchOperand(Taicpu(p).oper[0]^,0) and
  897. (Taicpu(p).oper[1]^.typ = top_reg) and
  898. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  899. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  900. { mov reg1,0
  901. bts reg1,operand1 --> mov reg1,operand2
  902. or reg1,operand2 bts reg1,operand1}
  903. begin
  904. Taicpu(hp2).opcode:=A_MOV;
  905. asml.remove(hp1);
  906. insertllitem(hp2,hp2.next,hp1);
  907. asml.remove(p);
  908. p.free;
  909. p:=hp1;
  910. end;
  911. if GetNextIntruction_p and
  912. MatchInstruction(hp1,A_LEA,[S_L]) and
  913. MatchOpType(Taicpu(p),top_ref,top_reg) and
  914. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  915. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  916. ) or
  917. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  918. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  919. )
  920. ) then
  921. { mov reg1,ref
  922. lea reg2,[reg1,reg2]
  923. to
  924. add reg2,ref}
  925. begin
  926. CopyUsedRegs(TmpUsedRegs);
  927. { reg1 may not be used afterwards }
  928. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  929. begin
  930. Taicpu(hp1).opcode:=A_ADD;
  931. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  932. DebugMsg('Peephole MovLea2Add done',hp1);
  933. asml.remove(p);
  934. p.free;
  935. p:=hp1;
  936. end;
  937. ReleaseUsedRegs(TmpUsedRegs);
  938. end;
  939. end;
  940. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  941. var
  942. TmpUsedRegs : TAllUsedRegs;
  943. hp1,hp2: tai;
  944. begin
  945. Result:=false;
  946. if MatchOpType(taicpu(p),top_reg,top_reg) and
  947. GetNextInstruction(p, hp1) and
  948. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  949. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  950. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  951. or
  952. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  953. ) and
  954. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  955. { mov reg1, reg2
  956. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  957. begin
  958. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  959. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  960. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  961. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  962. asml.remove(p);
  963. p.free;
  964. p := hp1;
  965. Result:=true;
  966. exit;
  967. end
  968. else if (taicpu(p).oper[0]^.typ = top_ref) and
  969. GetNextInstruction(p,hp1) and
  970. (hp1.typ = ait_instruction) and
  971. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  972. ((taicpu(hp1).opcode=A_LEA) and
  973. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  974. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  975. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  976. ) or
  977. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  978. taicpu(p).oper[1]^.reg) and
  979. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  980. )
  981. )
  982. ) and
  983. GetNextInstruction(hp1,hp2) and
  984. MatchInstruction(hp2,A_MOV,[]) and
  985. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  986. (taicpu(hp2).oper[1]^.typ = top_ref) then
  987. begin
  988. CopyUsedRegs(TmpUsedRegs);
  989. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  990. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  991. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2, TmpUsedRegs))) then
  992. { change mov (ref), reg
  993. add/sub/or/... reg2/$const, reg
  994. mov reg, (ref)
  995. # release reg
  996. to add/sub/or/... reg2/$const, (ref) }
  997. begin
  998. case taicpu(hp1).opcode of
  999. A_INC,A_DEC,A_NOT,A_NEG :
  1000. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1001. A_LEA :
  1002. begin
  1003. taicpu(hp1).opcode:=A_ADD;
  1004. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1005. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1006. else
  1007. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1008. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1009. DebugMsg('Peephole FoldLea done',hp1);
  1010. end
  1011. else
  1012. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1013. end;
  1014. asml.remove(p);
  1015. asml.remove(hp2);
  1016. p.free;
  1017. hp2.free;
  1018. p := hp1
  1019. end;
  1020. ReleaseUsedRegs(TmpUsedRegs);
  1021. end;
  1022. end;
  1023. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1024. var
  1025. hp1 : tai;
  1026. begin
  1027. Result:=false;
  1028. if not(GetNextInstruction(p, hp1)) then
  1029. exit;
  1030. if MatchOpType(p,top_const,top_reg) and
  1031. MatchInstruction(hp1,A_AND,[]) and
  1032. MatchOpType(hp1,top_const,top_reg) and
  1033. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1034. { the second register must contain the first one, so compare their subreg types }
  1035. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1036. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1037. { change
  1038. and const1, reg
  1039. and const2, reg
  1040. to
  1041. and (const1 and const2), reg
  1042. }
  1043. begin
  1044. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1045. DebugMsg('Peephole AndAnd2And done',hp1);
  1046. asml.remove(p);
  1047. p.Free;
  1048. p:=hp1;
  1049. Result:=true;
  1050. exit;
  1051. end
  1052. else if MatchOpType(p,top_const,top_reg) and
  1053. MatchInstruction(hp1,A_MOVZX,[]) and
  1054. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1055. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1056. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1057. (((taicpu(p).opsize=S_W) and
  1058. (taicpu(hp1).opsize=S_BW)) or
  1059. ((taicpu(p).opsize=S_L) and
  1060. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1061. {$ifdef x86_64}
  1062. or
  1063. ((taicpu(p).opsize=S_Q) and
  1064. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1065. {$endif x86_64}
  1066. ) then
  1067. begin
  1068. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1069. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1070. ) or
  1071. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1072. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1073. {$ifdef x86_64}
  1074. or
  1075. (((taicpu(hp1).opsize)=S_LQ) and
  1076. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1077. )
  1078. {$endif x86_64}
  1079. then
  1080. begin
  1081. DebugMsg('Peephole AndMovzToAnd done',p);
  1082. asml.remove(hp1);
  1083. hp1.free;
  1084. end;
  1085. end
  1086. else if MatchOpType(p,top_const,top_reg) and
  1087. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1088. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1089. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1090. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1091. (((taicpu(p).opsize=S_W) and
  1092. (taicpu(hp1).opsize=S_BW)) or
  1093. ((taicpu(p).opsize=S_L) and
  1094. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1095. {$ifdef x86_64}
  1096. or
  1097. ((taicpu(p).opsize=S_Q) and
  1098. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1099. {$endif x86_64}
  1100. ) then
  1101. begin
  1102. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1103. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1104. ) or
  1105. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1106. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1107. {$ifdef x86_64}
  1108. or
  1109. (((taicpu(hp1).opsize)=S_LQ) and
  1110. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1111. )
  1112. {$endif x86_64}
  1113. then
  1114. begin
  1115. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1116. asml.remove(hp1);
  1117. hp1.free;
  1118. end;
  1119. end
  1120. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1121. (hp1.typ = ait_instruction) and
  1122. (taicpu(hp1).is_jmp) and
  1123. (taicpu(hp1).opcode<>A_JMP) and
  1124. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1125. { change
  1126. and x, reg
  1127. jxx
  1128. to
  1129. test x, reg
  1130. jxx
  1131. if reg is deallocated before the
  1132. jump, but only if it's a conditional jump (PFV)
  1133. }
  1134. taicpu(p).opcode := A_TEST;
  1135. end;
  1136. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1137. begin
  1138. if MatchOperand(taicpu(p).oper[0]^,0) and
  1139. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1140. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1141. { change "mov $0, %reg" into "xor %reg, %reg" }
  1142. begin
  1143. taicpu(p).opcode := A_XOR;
  1144. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1145. end;
  1146. end;
  1147. end.