aoptx86.pas 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. if not(RegInUsedRegs(reg,initialusedregs)) then
  200. begin
  201. hp := tai_regalloc.alloc(reg,nil);
  202. insertllItem(p1.previous,p1,hp);
  203. IncludeRegInUsedRegs(reg,initialusedregs);
  204. end;
  205. while assigned(p1) and
  206. (p1 <> p2) do
  207. begin
  208. if assigned(p1.optinfo) then
  209. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  210. p1 := tai(p1.next);
  211. repeat
  212. while assigned(p1) and
  213. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  214. p1 := tai(p1.next);
  215. { remove all allocation/deallocation info about the register in between }
  216. if assigned(p1) and
  217. (p1.typ = ait_regalloc) then
  218. if tai_regalloc(p1).reg=reg then
  219. begin
  220. if not removedSomething then
  221. begin
  222. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  223. removedSomething := true;
  224. end;
  225. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  226. hp := tai(p1.Next);
  227. asml.Remove(p1);
  228. p1.free;
  229. p1 := hp;
  230. end
  231. else p1 := tai(p1.next);
  232. until not(assigned(p1)) or
  233. not(p1.typ in SkipInstr);
  234. end;
  235. if assigned(p1) then
  236. begin
  237. if firstRemovedWasAlloc then
  238. begin
  239. hp := tai_regalloc.Alloc(reg,nil);
  240. insertLLItem(start.previous,start,hp);
  241. end;
  242. if lastRemovedWasDealloc then
  243. begin
  244. hp := tai_regalloc.DeAlloc(reg,nil);
  245. insertLLItem(p1.previous,p1,hp);
  246. end;
  247. end;
  248. end;
  249. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  250. var
  251. p: taicpu;
  252. begin
  253. if not assigned(hp) or
  254. (hp.typ <> ait_instruction) then
  255. begin
  256. Result := false;
  257. exit;
  258. end;
  259. p := taicpu(hp);
  260. Result :=
  261. (((p.opcode = A_MOV) or
  262. (p.opcode = A_MOVZX) or
  263. (p.opcode = A_MOVSX) or
  264. (p.opcode = A_LEA) or
  265. (p.opcode = A_VMOVSS) or
  266. (p.opcode = A_VMOVSD) or
  267. (p.opcode = A_VMOVAPD) or
  268. (p.opcode = A_VMOVAPS) or
  269. (p.opcode = A_VMOVQ) or
  270. (p.opcode = A_MOVSS) or
  271. (p.opcode = A_MOVSD) or
  272. (p.opcode = A_MOVQ) or
  273. (p.opcode = A_MOVAPD) or
  274. (p.opcode = A_MOVAPS)) and
  275. (p.oper[1]^.typ = top_reg) and
  276. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  277. ((p.oper[0]^.typ = top_const) or
  278. ((p.oper[0]^.typ = top_reg) and
  279. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  280. ((p.oper[0]^.typ = top_ref) and
  281. not RegInRef(reg,p.oper[0]^.ref^)))) or
  282. ((p.opcode = A_POP) and
  283. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  284. end;
  285. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  286. var
  287. hp2,hp3 : tai;
  288. begin
  289. result:=(p.typ=ait_instruction) and
  290. ((taicpu(p).opcode = A_RET) or
  291. ((taicpu(p).opcode=A_LEAVE) and
  292. GetNextInstruction(p,hp2) and
  293. (hp2.typ=ait_instruction) and
  294. (taicpu(hp2).opcode=A_RET)
  295. ) or
  296. ((taicpu(p).opcode=A_MOV) and
  297. (taicpu(p).oper[0]^.typ=top_reg) and
  298. (taicpu(p).oper[0]^.reg=NR_EBP) and
  299. (taicpu(p).oper[1]^.typ=top_reg) and
  300. (taicpu(p).oper[1]^.reg=NR_ESP) and
  301. GetNextInstruction(p,hp2) and
  302. (hp2.typ=ait_instruction) and
  303. (taicpu(hp2).opcode=A_POP) and
  304. (taicpu(hp2).oper[0]^.typ=top_reg) and
  305. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  306. GetNextInstruction(hp2,hp3) and
  307. (hp3.typ=ait_instruction) and
  308. (taicpu(hp3).opcode=A_RET)
  309. )
  310. );
  311. end;
  312. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  313. begin
  314. isFoldableArithOp := False;
  315. case hp1.opcode of
  316. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  317. isFoldableArithOp :=
  318. ((taicpu(hp1).oper[0]^.typ = top_const) or
  319. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  320. (taicpu(hp1).oper[0]^.reg <> reg))) and
  321. (taicpu(hp1).oper[1]^.typ = top_reg) and
  322. (taicpu(hp1).oper[1]^.reg = reg);
  323. A_INC,A_DEC,A_NEG,A_NOT:
  324. isFoldableArithOp :=
  325. (taicpu(hp1).oper[0]^.typ = top_reg) and
  326. (taicpu(hp1).oper[0]^.reg = reg);
  327. end;
  328. end;
  329. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  330. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  331. var
  332. hp2: tai;
  333. begin
  334. hp2 := p;
  335. repeat
  336. hp2 := tai(hp2.previous);
  337. if assigned(hp2) and
  338. (hp2.typ = ait_regalloc) and
  339. (tai_regalloc(hp2).ratype=ra_dealloc) and
  340. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  341. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  342. begin
  343. asml.remove(hp2);
  344. hp2.free;
  345. break;
  346. end;
  347. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  348. end;
  349. begin
  350. case current_procinfo.procdef.returndef.typ of
  351. arraydef,recorddef,pointerdef,
  352. stringdef,enumdef,procdef,objectdef,errordef,
  353. filedef,setdef,procvardef,
  354. classrefdef,forwarddef:
  355. DoRemoveLastDeallocForFuncRes(RS_EAX);
  356. orddef:
  357. if current_procinfo.procdef.returndef.size <> 0 then
  358. begin
  359. DoRemoveLastDeallocForFuncRes(RS_EAX);
  360. { for int64/qword }
  361. if current_procinfo.procdef.returndef.size = 8 then
  362. DoRemoveLastDeallocForFuncRes(RS_EDX);
  363. end;
  364. end;
  365. end;
  366. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  367. var
  368. TmpUsedRegs : TAllUsedRegs;
  369. hp1,hp2 : tai;
  370. begin
  371. result:=false;
  372. if MatchOpType(taicpu(p),top_reg,top_reg) then
  373. begin
  374. { vmova* reg1,reg1
  375. =>
  376. <nop> }
  377. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  378. begin
  379. GetNextInstruction(p,hp1);
  380. asml.Remove(p);
  381. p.Free;
  382. p:=hp1;
  383. result:=true;
  384. end
  385. else if GetNextInstruction(p,hp1) then
  386. begin
  387. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  388. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  389. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  390. begin
  391. { vmova* reg1,reg2
  392. vmova* reg2,reg3
  393. dealloc reg2
  394. =>
  395. vmova* reg1,reg3 }
  396. CopyUsedRegs(TmpUsedRegs);
  397. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  398. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  399. begin
  400. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  401. asml.Remove(hp1);
  402. hp1.Free;
  403. result:=true;
  404. end
  405. { special case:
  406. vmova* reg1,reg2
  407. vmova* reg2,reg1
  408. =>
  409. vmova* reg1,reg2 }
  410. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  411. begin
  412. asml.Remove(hp1);
  413. hp1.Free;
  414. result:=true;
  415. end
  416. end
  417. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  418. { we mix single and double opperations here because we assume that the compiler
  419. generates vmovapd only after double operations and vmovaps only after single operations }
  420. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  421. GetNextInstruction(hp1,hp2) and
  422. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  423. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  424. begin
  425. CopyUsedRegs(TmpUsedRegs);
  426. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  427. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  428. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  429. then
  430. begin
  431. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  432. asml.Remove(p);
  433. p.Free;
  434. asml.Remove(hp2);
  435. hp2.Free;
  436. p:=hp1;
  437. end;
  438. end;
  439. end;
  440. end;
  441. end;
  442. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  443. var
  444. TmpUsedRegs : TAllUsedRegs;
  445. hp1 : tai;
  446. begin
  447. result:=false;
  448. if GetNextInstruction(p,hp1) and
  449. { we mix single and double opperations here because we assume that the compiler
  450. generates vmovapd only after double operations and vmovaps only after single operations }
  451. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  452. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  453. (taicpu(hp1).oper[1]^.typ=top_reg) then
  454. begin
  455. CopyUsedRegs(TmpUsedRegs);
  456. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  457. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  458. ) then
  459. begin
  460. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  461. asml.Remove(hp1);
  462. hp1.Free;
  463. result:=true;
  464. end;
  465. end;
  466. end;
  467. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  468. var
  469. hp1, hp2: tai;
  470. TmpUsedRegs : TAllUsedRegs;
  471. GetNextIntruction_p : Boolean;
  472. begin
  473. Result:=false;
  474. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  475. if GetNextIntruction_p and
  476. MatchInstruction(hp1,A_AND,[]) and
  477. (taicpu(p).oper[1]^.typ = top_reg) and
  478. MatchOpType(taicpu(hp1),top_const,top_reg) and
  479. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  480. case taicpu(p).opsize Of
  481. S_L:
  482. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  483. begin
  484. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  485. asml.remove(hp1);
  486. hp1.free;
  487. Result:=true;
  488. exit;
  489. end;
  490. end
  491. else if GetNextIntruction_p and
  492. MatchInstruction(hp1,A_MOV,[]) and
  493. (taicpu(p).oper[1]^.typ = top_reg) and
  494. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  495. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  496. begin
  497. CopyUsedRegs(TmpUsedRegs);
  498. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  499. { we have
  500. mov x, %treg
  501. mov %treg, y
  502. }
  503. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  504. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  505. { we've got
  506. mov x, %treg
  507. mov %treg, y
  508. with %treg is not used after }
  509. case taicpu(p).oper[0]^.typ Of
  510. top_reg:
  511. begin
  512. { change
  513. mov %reg, %treg
  514. mov %treg, y
  515. to
  516. mov %reg, y
  517. }
  518. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  519. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  520. asml.remove(hp1);
  521. hp1.free;
  522. ReleaseUsedRegs(TmpUsedRegs);
  523. Exit;
  524. end;
  525. top_ref:
  526. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  527. begin
  528. { change
  529. mov mem, %treg
  530. mov %treg, %reg
  531. to
  532. mov mem, %reg"
  533. }
  534. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  535. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  536. asml.remove(hp1);
  537. hp1.free;
  538. ReleaseUsedRegs(TmpUsedRegs);
  539. Exit;
  540. end;
  541. end;
  542. ReleaseUsedRegs(TmpUsedRegs);
  543. end
  544. else
  545. { Change
  546. mov %reg1, %reg2
  547. xxx %reg2, ???
  548. to
  549. mov %reg1, %reg2
  550. xxx %reg1, ???
  551. to avoid a write/read penalty
  552. }
  553. if MatchOpType(taicpu(p),top_reg,top_reg) and
  554. GetNextInstruction(p,hp1) and
  555. (tai(hp1).typ = ait_instruction) and
  556. (taicpu(hp1).ops >= 1) and
  557. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  558. { we have
  559. mov %reg1, %reg2
  560. XXX %reg2, ???
  561. }
  562. begin
  563. if ((taicpu(hp1).opcode = A_OR) or
  564. (taicpu(hp1).opcode = A_TEST)) and
  565. (taicpu(hp1).oper[1]^.typ = top_reg) and
  566. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  567. { we have
  568. mov %reg1, %reg2
  569. test/or %reg2, %reg2
  570. }
  571. begin
  572. CopyUsedRegs(TmpUsedRegs);
  573. { reg1 will be used after the first instruction,
  574. so update the allocation info }
  575. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  576. if GetNextInstruction(hp1, hp2) and
  577. (hp2.typ = ait_instruction) and
  578. taicpu(hp2).is_jmp and
  579. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  580. { change
  581. mov %reg1, %reg2
  582. test/or %reg2, %reg2
  583. jxx
  584. to
  585. test %reg1, %reg1
  586. jxx
  587. }
  588. begin
  589. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  590. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  591. asml.remove(p);
  592. p.free;
  593. p := hp1;
  594. ReleaseUsedRegs(TmpUsedRegs);
  595. Exit;
  596. end
  597. else
  598. { change
  599. mov %reg1, %reg2
  600. test/or %reg2, %reg2
  601. to
  602. mov %reg1, %reg2
  603. test/or %reg1, %reg1
  604. }
  605. begin
  606. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  607. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  608. end;
  609. ReleaseUsedRegs(TmpUsedRegs);
  610. end
  611. end
  612. else
  613. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  614. x >= RetOffset) as it doesn't do anything (it writes either to a
  615. parameter or to the temporary storage room for the function
  616. result)
  617. }
  618. if GetNextIntruction_p and
  619. (tai(hp1).typ = ait_instruction) then
  620. begin
  621. if IsExitCode(hp1) and
  622. MatchOpType(p,top_reg,top_ref) and
  623. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  624. not(assigned(current_procinfo.procdef.funcretsym) and
  625. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  626. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  627. begin
  628. asml.remove(p);
  629. p.free;
  630. p := hp1;
  631. DebugMsg('Peephole removed deadstore before leave/ret',p);
  632. RemoveLastDeallocForFuncRes(p);
  633. end
  634. { change
  635. mov reg1, mem1
  636. cmp x, mem1
  637. to
  638. mov reg1, mem1
  639. cmp x, reg1
  640. }
  641. else if MatchOpType(p,top_reg,top_ref) and
  642. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  643. (taicpu(hp1).oper[1]^.typ = top_ref) and
  644. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  645. begin
  646. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  647. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  648. end;
  649. end;
  650. { Next instruction is also a MOV ? }
  651. if GetNextIntruction_p and
  652. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  653. begin
  654. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  655. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  656. { mov reg1, mem1 or mov mem1, reg1
  657. mov mem2, reg2 mov reg2, mem2}
  658. begin
  659. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  660. { mov reg1, mem1 or mov mem1, reg1
  661. mov mem2, reg1 mov reg2, mem1}
  662. begin
  663. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  664. { Removes the second statement from
  665. mov reg1, mem1/reg2
  666. mov mem1/reg2, reg1 }
  667. begin
  668. if (taicpu(p).oper[0]^.typ = top_reg) then
  669. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  670. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  671. asml.remove(hp1);
  672. hp1.free;
  673. Result:=true;
  674. exit;
  675. end
  676. else
  677. begin
  678. CopyUsedRegs(TmpUsedRegs);
  679. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  680. if (taicpu(p).oper[1]^.typ = top_ref) and
  681. { mov reg1, mem1
  682. mov mem2, reg1 }
  683. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  684. GetNextInstruction(hp1, hp2) and
  685. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  686. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  687. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  688. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  689. { change to
  690. mov reg1, mem1 mov reg1, mem1
  691. mov mem2, reg1 cmp reg1, mem2
  692. cmp mem1, reg1
  693. }
  694. begin
  695. asml.remove(hp2);
  696. hp2.free;
  697. taicpu(hp1).opcode := A_CMP;
  698. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  699. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  700. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  701. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  702. end;
  703. ReleaseUsedRegs(TmpUsedRegs);
  704. end;
  705. end
  706. else if (taicpu(p).oper[1]^.typ=top_ref) and
  707. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  708. begin
  709. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  710. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  711. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  712. end
  713. else
  714. begin
  715. CopyUsedRegs(TmpUsedRegs);
  716. if GetNextInstruction(hp1, hp2) and
  717. MatchOpType(taicpu(p),top_ref,top_reg) and
  718. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  719. (taicpu(hp1).oper[1]^.typ = top_ref) and
  720. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  721. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  722. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  723. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  724. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  725. { mov mem1, %reg1
  726. mov %reg1, mem2
  727. mov mem2, reg2
  728. to:
  729. mov mem1, reg2
  730. mov reg2, mem2}
  731. begin
  732. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  733. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  734. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  735. asml.remove(hp2);
  736. hp2.free;
  737. end
  738. {$ifdef i386}
  739. { this is enabled for i386 only, as the rules to create the reg sets below
  740. are too complicated for x86-64, so this makes this code too error prone
  741. on x86-64
  742. }
  743. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  744. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  745. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  746. { mov mem1, reg1 mov mem1, reg1
  747. mov reg1, mem2 mov reg1, mem2
  748. mov mem2, reg2 mov mem2, reg1
  749. to: to:
  750. mov mem1, reg1 mov mem1, reg1
  751. mov mem1, reg2 mov reg1, mem2
  752. mov reg1, mem2
  753. or (if mem1 depends on reg1
  754. and/or if mem2 depends on reg2)
  755. to:
  756. mov mem1, reg1
  757. mov reg1, mem2
  758. mov reg1, reg2
  759. }
  760. begin
  761. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  762. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  763. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  764. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  765. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  766. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  767. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  768. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  769. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  770. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  771. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  772. end
  773. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  774. begin
  775. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  776. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  777. end
  778. else
  779. begin
  780. asml.remove(hp2);
  781. hp2.free;
  782. end
  783. {$endif i386}
  784. ;
  785. ReleaseUsedRegs(TmpUsedRegs);
  786. end;
  787. end
  788. (* { movl [mem1],reg1
  789. movl [mem1],reg2
  790. to
  791. movl [mem1],reg1
  792. movl reg1,reg2
  793. }
  794. else if (taicpu(p).oper[0]^.typ = top_ref) and
  795. (taicpu(p).oper[1]^.typ = top_reg) and
  796. (taicpu(hp1).oper[0]^.typ = top_ref) and
  797. (taicpu(hp1).oper[1]^.typ = top_reg) and
  798. (taicpu(p).opsize = taicpu(hp1).opsize) and
  799. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  800. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  801. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  802. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  803. else*)
  804. { movl const1,[mem1]
  805. movl [mem1],reg1
  806. to
  807. movl const1,reg1
  808. movl reg1,[mem1]
  809. }
  810. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  811. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  812. (taicpu(p).opsize = taicpu(hp1).opsize) and
  813. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  814. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  815. begin
  816. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  817. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  818. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  819. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  820. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  821. end
  822. end
  823. else if (taicpu(p).oper[1]^.typ = top_reg) and
  824. GetNextIntruction_p and
  825. (hp1.typ = ait_instruction) and
  826. GetNextInstruction(hp1, hp2) and
  827. MatchInstruction(hp2,A_MOV,[]) and
  828. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  829. OpsEqual(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^) and
  830. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  831. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  832. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  833. ) then
  834. { change movsX/movzX reg/ref, reg2
  835. add/sub/or/... reg3/$const, reg2
  836. mov reg2 reg/ref
  837. to add/sub/or/... reg3/$const, reg/ref }
  838. begin
  839. CopyUsedRegs(TmpUsedRegs);
  840. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  841. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  842. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  843. begin
  844. { by example:
  845. movswl %si,%eax movswl %si,%eax p
  846. decl %eax addl %edx,%eax hp1
  847. movw %ax,%si movw %ax,%si hp2
  848. ->
  849. movswl %si,%eax movswl %si,%eax p
  850. decw %eax addw %edx,%eax hp1
  851. movw %ax,%si movw %ax,%si hp2
  852. }
  853. DebugMsg('PeepHole Optimization '+
  854. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  855. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  856. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  857. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  858. {
  859. ->
  860. movswl %si,%eax movswl %si,%eax p
  861. decw %si addw %dx,%si hp1
  862. movw %ax,%si movw %ax,%si hp2
  863. }
  864. case taicpu(hp1).ops of
  865. 1:
  866. begin
  867. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  868. if taicpu(hp1).oper[0]^.typ=top_reg then
  869. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  870. end;
  871. 2:
  872. begin
  873. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  874. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  875. (taicpu(hp1).opcode<>A_SHL) and
  876. (taicpu(hp1).opcode<>A_SHR) and
  877. (taicpu(hp1).opcode<>A_SAR) then
  878. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  879. end;
  880. else
  881. internalerror(2008042701);
  882. end;
  883. {
  884. ->
  885. decw %si addw %dx,%si p
  886. }
  887. asml.remove(p);
  888. asml.remove(hp2);
  889. p.Free;
  890. hp2.Free;
  891. p := hp1;
  892. end;
  893. ReleaseUsedRegs(TmpUsedRegs);
  894. end;
  895. if GetNextIntruction_p and
  896. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  897. GetNextInstruction(hp1, hp2) and
  898. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  899. MatchOperand(Taicpu(p).oper[0]^,0) and
  900. (Taicpu(p).oper[1]^.typ = top_reg) and
  901. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  902. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  903. { mov reg1,0
  904. bts reg1,operand1 --> mov reg1,operand2
  905. or reg1,operand2 bts reg1,operand1}
  906. begin
  907. Taicpu(hp2).opcode:=A_MOV;
  908. asml.remove(hp1);
  909. insertllitem(hp2,hp2.next,hp1);
  910. asml.remove(p);
  911. p.free;
  912. p:=hp1;
  913. end;
  914. if GetNextIntruction_p and
  915. MatchInstruction(hp1,A_LEA,[S_L]) and
  916. MatchOpType(Taicpu(p),top_ref,top_reg) and
  917. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  918. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  919. ) or
  920. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  921. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  922. )
  923. ) then
  924. { mov reg1,ref
  925. lea reg2,[reg1,reg2]
  926. to
  927. add reg2,ref}
  928. begin
  929. CopyUsedRegs(TmpUsedRegs);
  930. { reg1 may not be used afterwards }
  931. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  932. begin
  933. Taicpu(hp1).opcode:=A_ADD;
  934. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  935. DebugMsg('Peephole MovLea2Add done',hp1);
  936. asml.remove(p);
  937. p.free;
  938. p:=hp1;
  939. end;
  940. ReleaseUsedRegs(TmpUsedRegs);
  941. end;
  942. end;
  943. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  944. var
  945. TmpUsedRegs : TAllUsedRegs;
  946. hp1,hp2: tai;
  947. begin
  948. Result:=false;
  949. if MatchOpType(taicpu(p),top_reg,top_reg) and
  950. GetNextInstruction(p, hp1) and
  951. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  952. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  953. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  954. or
  955. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  956. ) and
  957. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  958. { mov reg1, reg2
  959. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  960. begin
  961. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  962. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  963. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  964. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  965. asml.remove(p);
  966. p.free;
  967. p := hp1;
  968. Result:=true;
  969. exit;
  970. end
  971. else if (taicpu(p).oper[0]^.typ = top_ref) and
  972. GetNextInstruction(p,hp1) and
  973. (hp1.typ = ait_instruction) and
  974. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  975. ((taicpu(hp1).opcode=A_LEA) and
  976. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  977. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  978. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  979. ) or
  980. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  981. taicpu(p).oper[1]^.reg) and
  982. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  983. )
  984. )
  985. ) and
  986. GetNextInstruction(hp1,hp2) and
  987. MatchInstruction(hp2,A_MOV,[]) and
  988. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  989. (taicpu(hp2).oper[1]^.typ = top_ref) then
  990. begin
  991. CopyUsedRegs(TmpUsedRegs);
  992. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  993. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  994. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2, TmpUsedRegs))) then
  995. { change mov (ref), reg
  996. add/sub/or/... reg2/$const, reg
  997. mov reg, (ref)
  998. # release reg
  999. to add/sub/or/... reg2/$const, (ref) }
  1000. begin
  1001. case taicpu(hp1).opcode of
  1002. A_INC,A_DEC,A_NOT,A_NEG :
  1003. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1004. A_LEA :
  1005. begin
  1006. taicpu(hp1).opcode:=A_ADD;
  1007. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1008. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1009. else
  1010. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1011. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1012. DebugMsg('Peephole FoldLea done',hp1);
  1013. end
  1014. else
  1015. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1016. end;
  1017. asml.remove(p);
  1018. asml.remove(hp2);
  1019. p.free;
  1020. hp2.free;
  1021. p := hp1
  1022. end;
  1023. ReleaseUsedRegs(TmpUsedRegs);
  1024. end;
  1025. end;
  1026. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1027. var
  1028. hp1 : tai;
  1029. begin
  1030. Result:=false;
  1031. if not(GetNextInstruction(p, hp1)) then
  1032. exit;
  1033. if MatchOpType(p,top_const,top_reg) and
  1034. MatchInstruction(hp1,A_AND,[]) and
  1035. MatchOpType(hp1,top_const,top_reg) and
  1036. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1037. { the second register must contain the first one, so compare their subreg types }
  1038. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1039. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1040. { change
  1041. and const1, reg
  1042. and const2, reg
  1043. to
  1044. and (const1 and const2), reg
  1045. }
  1046. begin
  1047. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1048. DebugMsg('Peephole AndAnd2And done',hp1);
  1049. asml.remove(p);
  1050. p.Free;
  1051. p:=hp1;
  1052. Result:=true;
  1053. exit;
  1054. end
  1055. else if MatchOpType(p,top_const,top_reg) and
  1056. MatchInstruction(hp1,A_MOVZX,[]) and
  1057. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1058. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1059. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1060. (((taicpu(p).opsize=S_W) and
  1061. (taicpu(hp1).opsize=S_BW)) or
  1062. ((taicpu(p).opsize=S_L) and
  1063. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1064. {$ifdef x86_64}
  1065. or
  1066. ((taicpu(p).opsize=S_Q) and
  1067. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1068. {$endif x86_64}
  1069. ) then
  1070. begin
  1071. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1072. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1073. ) or
  1074. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1075. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1076. {$ifdef x86_64}
  1077. or
  1078. (((taicpu(hp1).opsize)=S_LQ) and
  1079. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1080. )
  1081. {$endif x86_64}
  1082. then
  1083. begin
  1084. DebugMsg('Peephole AndMovzToAnd done',p);
  1085. asml.remove(hp1);
  1086. hp1.free;
  1087. end;
  1088. end
  1089. else if MatchOpType(p,top_const,top_reg) and
  1090. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1091. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1092. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1093. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1094. (((taicpu(p).opsize=S_W) and
  1095. (taicpu(hp1).opsize=S_BW)) or
  1096. ((taicpu(p).opsize=S_L) and
  1097. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1098. {$ifdef x86_64}
  1099. or
  1100. ((taicpu(p).opsize=S_Q) and
  1101. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1102. {$endif x86_64}
  1103. ) then
  1104. begin
  1105. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1106. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1107. ) or
  1108. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1109. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1110. {$ifdef x86_64}
  1111. or
  1112. (((taicpu(hp1).opsize)=S_LQ) and
  1113. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1114. )
  1115. {$endif x86_64}
  1116. then
  1117. begin
  1118. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1119. asml.remove(hp1);
  1120. hp1.free;
  1121. end;
  1122. end
  1123. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1124. (hp1.typ = ait_instruction) and
  1125. (taicpu(hp1).is_jmp) and
  1126. (taicpu(hp1).opcode<>A_JMP) and
  1127. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1128. { change
  1129. and x, reg
  1130. jxx
  1131. to
  1132. test x, reg
  1133. jxx
  1134. if reg is deallocated before the
  1135. jump, but only if it's a conditional jump (PFV)
  1136. }
  1137. taicpu(p).opcode := A_TEST;
  1138. end;
  1139. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1140. begin
  1141. if MatchOperand(taicpu(p).oper[0]^,0) and
  1142. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1143. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1144. { change "mov $0, %reg" into "xor %reg, %reg" }
  1145. begin
  1146. taicpu(p).opcode := A_XOR;
  1147. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1148. end;
  1149. end;
  1150. end.