aoptx86.pas 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS)) and
  368. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  369. (p.oper[1]^.typ = top_reg) and
  370. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  371. ((p.oper[0]^.typ = top_const) or
  372. ((p.oper[0]^.typ = top_reg) and
  373. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  374. ((p.oper[0]^.typ = top_ref) and
  375. not RegInRef(reg,p.oper[0]^.ref^)))) or
  376. ((p.opcode = A_POP) and
  377. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  378. ((p.opcode = A_IMUL) and
  379. (p.ops=3) and
  380. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  381. not((Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))));
  382. end;
  383. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  384. var
  385. hp2,hp3 : tai;
  386. begin
  387. result:=(p.typ=ait_instruction) and
  388. ((taicpu(p).opcode = A_RET) or
  389. ((taicpu(p).opcode=A_LEAVE) and
  390. GetNextInstruction(p,hp2) and
  391. (hp2.typ=ait_instruction) and
  392. (taicpu(hp2).opcode=A_RET)
  393. ) or
  394. ((taicpu(p).opcode=A_MOV) and
  395. (taicpu(p).oper[0]^.typ=top_reg) and
  396. (taicpu(p).oper[0]^.reg=NR_EBP) and
  397. (taicpu(p).oper[1]^.typ=top_reg) and
  398. (taicpu(p).oper[1]^.reg=NR_ESP) and
  399. GetNextInstruction(p,hp2) and
  400. (hp2.typ=ait_instruction) and
  401. (taicpu(hp2).opcode=A_POP) and
  402. (taicpu(hp2).oper[0]^.typ=top_reg) and
  403. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  404. GetNextInstruction(hp2,hp3) and
  405. (hp3.typ=ait_instruction) and
  406. (taicpu(hp3).opcode=A_RET)
  407. )
  408. );
  409. end;
  410. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  411. begin
  412. isFoldableArithOp := False;
  413. case hp1.opcode of
  414. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  415. isFoldableArithOp :=
  416. ((taicpu(hp1).oper[0]^.typ = top_const) or
  417. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  418. (taicpu(hp1).oper[0]^.reg <> reg))) and
  419. (taicpu(hp1).oper[1]^.typ = top_reg) and
  420. (taicpu(hp1).oper[1]^.reg = reg);
  421. A_INC,A_DEC,A_NEG,A_NOT:
  422. isFoldableArithOp :=
  423. (taicpu(hp1).oper[0]^.typ = top_reg) and
  424. (taicpu(hp1).oper[0]^.reg = reg);
  425. end;
  426. end;
  427. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  428. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  429. var
  430. hp2: tai;
  431. begin
  432. hp2 := p;
  433. repeat
  434. hp2 := tai(hp2.previous);
  435. if assigned(hp2) and
  436. (hp2.typ = ait_regalloc) and
  437. (tai_regalloc(hp2).ratype=ra_dealloc) and
  438. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  439. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  440. begin
  441. asml.remove(hp2);
  442. hp2.free;
  443. break;
  444. end;
  445. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  446. end;
  447. begin
  448. case current_procinfo.procdef.returndef.typ of
  449. arraydef,recorddef,pointerdef,
  450. stringdef,enumdef,procdef,objectdef,errordef,
  451. filedef,setdef,procvardef,
  452. classrefdef,forwarddef:
  453. DoRemoveLastDeallocForFuncRes(RS_EAX);
  454. orddef:
  455. if current_procinfo.procdef.returndef.size <> 0 then
  456. begin
  457. DoRemoveLastDeallocForFuncRes(RS_EAX);
  458. { for int64/qword }
  459. if current_procinfo.procdef.returndef.size = 8 then
  460. DoRemoveLastDeallocForFuncRes(RS_EDX);
  461. end;
  462. end;
  463. end;
  464. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  465. var
  466. TmpUsedRegs : TAllUsedRegs;
  467. hp1,hp2 : tai;
  468. begin
  469. result:=false;
  470. if MatchOpType(taicpu(p),top_reg,top_reg) then
  471. begin
  472. { vmova* reg1,reg1
  473. =>
  474. <nop> }
  475. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  476. begin
  477. GetNextInstruction(p,hp1);
  478. asml.Remove(p);
  479. p.Free;
  480. p:=hp1;
  481. result:=true;
  482. end
  483. else if GetNextInstruction(p,hp1) then
  484. begin
  485. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  486. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  487. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  488. begin
  489. { vmova* reg1,reg2
  490. vmova* reg2,reg3
  491. dealloc reg2
  492. =>
  493. vmova* reg1,reg3 }
  494. CopyUsedRegs(TmpUsedRegs);
  495. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  496. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  497. begin
  498. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  499. asml.Remove(hp1);
  500. hp1.Free;
  501. result:=true;
  502. end
  503. { special case:
  504. vmova* reg1,reg2
  505. vmova* reg2,reg1
  506. =>
  507. vmova* reg1,reg2 }
  508. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  509. begin
  510. asml.Remove(hp1);
  511. hp1.Free;
  512. result:=true;
  513. end
  514. end
  515. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  516. { we mix single and double opperations here because we assume that the compiler
  517. generates vmovapd only after double operations and vmovaps only after single operations }
  518. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  519. GetNextInstruction(hp1,hp2) and
  520. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  521. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  522. begin
  523. CopyUsedRegs(TmpUsedRegs);
  524. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  525. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  526. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  527. then
  528. begin
  529. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  530. asml.Remove(p);
  531. p.Free;
  532. asml.Remove(hp2);
  533. hp2.Free;
  534. p:=hp1;
  535. end;
  536. end;
  537. end;
  538. end;
  539. end;
  540. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  541. var
  542. TmpUsedRegs : TAllUsedRegs;
  543. hp1 : tai;
  544. begin
  545. result:=false;
  546. if GetNextInstruction(p,hp1) and
  547. { we mix single and double opperations here because we assume that the compiler
  548. generates vmovapd only after double operations and vmovaps only after single operations }
  549. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  550. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  551. (taicpu(hp1).oper[1]^.typ=top_reg) then
  552. begin
  553. CopyUsedRegs(TmpUsedRegs);
  554. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  555. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  556. ) then
  557. begin
  558. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  559. asml.Remove(hp1);
  560. hp1.Free;
  561. result:=true;
  562. end;
  563. end;
  564. end;
  565. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  566. var
  567. hp1, hp2: tai;
  568. TmpUsedRegs : TAllUsedRegs;
  569. GetNextIntruction_p : Boolean;
  570. begin
  571. Result:=false;
  572. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  573. if GetNextIntruction_p and
  574. MatchInstruction(hp1,A_AND,[]) and
  575. (taicpu(p).oper[1]^.typ = top_reg) and
  576. MatchOpType(taicpu(hp1),top_const,top_reg) and
  577. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  578. case taicpu(p).opsize Of
  579. S_L:
  580. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  581. begin
  582. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  583. asml.remove(hp1);
  584. hp1.free;
  585. Result:=true;
  586. exit;
  587. end;
  588. end
  589. else if GetNextIntruction_p and
  590. MatchInstruction(hp1,A_MOV,[]) and
  591. (taicpu(p).oper[1]^.typ = top_reg) and
  592. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  593. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  594. begin
  595. CopyUsedRegs(TmpUsedRegs);
  596. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  597. { we have
  598. mov x, %treg
  599. mov %treg, y
  600. }
  601. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  602. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  603. { we've got
  604. mov x, %treg
  605. mov %treg, y
  606. with %treg is not used after }
  607. case taicpu(p).oper[0]^.typ Of
  608. top_reg:
  609. begin
  610. { change
  611. mov %reg, %treg
  612. mov %treg, y
  613. to
  614. mov %reg, y
  615. }
  616. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  617. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  618. asml.remove(hp1);
  619. hp1.free;
  620. ReleaseUsedRegs(TmpUsedRegs);
  621. Exit;
  622. end;
  623. top_ref:
  624. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  625. begin
  626. { change
  627. mov mem, %treg
  628. mov %treg, %reg
  629. to
  630. mov mem, %reg"
  631. }
  632. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  633. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  634. asml.remove(hp1);
  635. hp1.free;
  636. ReleaseUsedRegs(TmpUsedRegs);
  637. Exit;
  638. end;
  639. end;
  640. ReleaseUsedRegs(TmpUsedRegs);
  641. end
  642. else
  643. { Change
  644. mov %reg1, %reg2
  645. xxx %reg2, ???
  646. to
  647. mov %reg1, %reg2
  648. xxx %reg1, ???
  649. to avoid a write/read penalty
  650. }
  651. if MatchOpType(taicpu(p),top_reg,top_reg) and
  652. GetNextInstruction(p,hp1) and
  653. (tai(hp1).typ = ait_instruction) and
  654. (taicpu(hp1).ops >= 1) and
  655. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  656. { we have
  657. mov %reg1, %reg2
  658. XXX %reg2, ???
  659. }
  660. begin
  661. if ((taicpu(hp1).opcode = A_OR) or
  662. (taicpu(hp1).opcode = A_TEST)) and
  663. (taicpu(hp1).oper[1]^.typ = top_reg) and
  664. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  665. { we have
  666. mov %reg1, %reg2
  667. test/or %reg2, %reg2
  668. }
  669. begin
  670. CopyUsedRegs(TmpUsedRegs);
  671. { reg1 will be used after the first instruction,
  672. so update the allocation info }
  673. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  674. if GetNextInstruction(hp1, hp2) and
  675. (hp2.typ = ait_instruction) and
  676. taicpu(hp2).is_jmp and
  677. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  678. { change
  679. mov %reg1, %reg2
  680. test/or %reg2, %reg2
  681. jxx
  682. to
  683. test %reg1, %reg1
  684. jxx
  685. }
  686. begin
  687. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  688. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  689. asml.remove(p);
  690. p.free;
  691. p := hp1;
  692. ReleaseUsedRegs(TmpUsedRegs);
  693. Exit;
  694. end
  695. else
  696. { change
  697. mov %reg1, %reg2
  698. test/or %reg2, %reg2
  699. to
  700. mov %reg1, %reg2
  701. test/or %reg1, %reg1
  702. }
  703. begin
  704. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  705. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  706. end;
  707. ReleaseUsedRegs(TmpUsedRegs);
  708. end
  709. end
  710. else
  711. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  712. x >= RetOffset) as it doesn't do anything (it writes either to a
  713. parameter or to the temporary storage room for the function
  714. result)
  715. }
  716. if GetNextIntruction_p and
  717. (tai(hp1).typ = ait_instruction) then
  718. begin
  719. if IsExitCode(hp1) and
  720. MatchOpType(p,top_reg,top_ref) and
  721. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  722. not(assigned(current_procinfo.procdef.funcretsym) and
  723. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  724. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  725. begin
  726. asml.remove(p);
  727. p.free;
  728. p:=hp1;
  729. DebugMsg('Peephole removed deadstore before leave/ret',p);
  730. RemoveLastDeallocForFuncRes(p);
  731. exit;
  732. end
  733. { change
  734. mov reg1, mem1
  735. cmp x, mem1
  736. to
  737. mov reg1, mem1
  738. cmp x, reg1
  739. }
  740. else if MatchOpType(p,top_reg,top_ref) and
  741. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  742. (taicpu(hp1).oper[1]^.typ = top_ref) and
  743. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  744. begin
  745. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  746. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  747. end;
  748. end;
  749. { Next instruction is also a MOV ? }
  750. if GetNextIntruction_p and
  751. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  752. begin
  753. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  754. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  755. { mov reg1, mem1 or mov mem1, reg1
  756. mov mem2, reg2 mov reg2, mem2}
  757. begin
  758. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  759. { mov reg1, mem1 or mov mem1, reg1
  760. mov mem2, reg1 mov reg2, mem1}
  761. begin
  762. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  763. { Removes the second statement from
  764. mov reg1, mem1/reg2
  765. mov mem1/reg2, reg1 }
  766. begin
  767. if taicpu(p).oper[0]^.typ=top_reg then
  768. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  769. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  770. asml.remove(hp1);
  771. hp1.free;
  772. Result:=true;
  773. exit;
  774. end
  775. else
  776. begin
  777. CopyUsedRegs(TmpUsedRegs);
  778. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  779. if (taicpu(p).oper[1]^.typ = top_ref) and
  780. { mov reg1, mem1
  781. mov mem2, reg1 }
  782. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  783. GetNextInstruction(hp1, hp2) and
  784. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  785. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  786. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  787. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  788. { change to
  789. mov reg1, mem1 mov reg1, mem1
  790. mov mem2, reg1 cmp reg1, mem2
  791. cmp mem1, reg1
  792. }
  793. begin
  794. asml.remove(hp2);
  795. hp2.free;
  796. taicpu(hp1).opcode := A_CMP;
  797. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  798. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  799. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  800. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  801. end;
  802. ReleaseUsedRegs(TmpUsedRegs);
  803. end;
  804. end
  805. else if (taicpu(p).oper[1]^.typ=top_ref) and
  806. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  807. begin
  808. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  809. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  810. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  811. end
  812. else
  813. begin
  814. CopyUsedRegs(TmpUsedRegs);
  815. if GetNextInstruction(hp1, hp2) and
  816. MatchOpType(taicpu(p),top_ref,top_reg) and
  817. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  818. (taicpu(hp1).oper[1]^.typ = top_ref) and
  819. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  820. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  821. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  822. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  823. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  824. { mov mem1, %reg1
  825. mov %reg1, mem2
  826. mov mem2, reg2
  827. to:
  828. mov mem1, reg2
  829. mov reg2, mem2}
  830. begin
  831. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  832. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  833. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  834. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  835. asml.remove(hp2);
  836. hp2.free;
  837. end
  838. {$ifdef i386}
  839. { this is enabled for i386 only, as the rules to create the reg sets below
  840. are too complicated for x86-64, so this makes this code too error prone
  841. on x86-64
  842. }
  843. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  844. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  845. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  846. { mov mem1, reg1 mov mem1, reg1
  847. mov reg1, mem2 mov reg1, mem2
  848. mov mem2, reg2 mov mem2, reg1
  849. to: to:
  850. mov mem1, reg1 mov mem1, reg1
  851. mov mem1, reg2 mov reg1, mem2
  852. mov reg1, mem2
  853. or (if mem1 depends on reg1
  854. and/or if mem2 depends on reg2)
  855. to:
  856. mov mem1, reg1
  857. mov reg1, mem2
  858. mov reg1, reg2
  859. }
  860. begin
  861. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  862. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  863. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  864. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  865. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  866. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  867. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  868. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  869. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  870. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  871. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  872. end
  873. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  874. begin
  875. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  876. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  877. end
  878. else
  879. begin
  880. asml.remove(hp2);
  881. hp2.free;
  882. end
  883. {$endif i386}
  884. ;
  885. ReleaseUsedRegs(TmpUsedRegs);
  886. end;
  887. end
  888. (* { movl [mem1],reg1
  889. movl [mem1],reg2
  890. to
  891. movl [mem1],reg1
  892. movl reg1,reg2
  893. }
  894. else if (taicpu(p).oper[0]^.typ = top_ref) and
  895. (taicpu(p).oper[1]^.typ = top_reg) and
  896. (taicpu(hp1).oper[0]^.typ = top_ref) and
  897. (taicpu(hp1).oper[1]^.typ = top_reg) and
  898. (taicpu(p).opsize = taicpu(hp1).opsize) and
  899. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  900. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  901. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  902. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  903. else*)
  904. { movl const1,[mem1]
  905. movl [mem1],reg1
  906. to
  907. movl const1,reg1
  908. movl reg1,[mem1]
  909. }
  910. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  911. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  912. (taicpu(p).opsize = taicpu(hp1).opsize) and
  913. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  914. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  915. begin
  916. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  917. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  918. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  919. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  920. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  921. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  922. end
  923. end
  924. else if (taicpu(p).oper[1]^.typ = top_reg) and
  925. GetNextIntruction_p and
  926. (hp1.typ = ait_instruction) and
  927. GetNextInstruction(hp1, hp2) and
  928. MatchInstruction(hp2,A_MOV,[]) and
  929. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  930. (taicpu(hp2).oper[0]^.typ=top_reg) and
  931. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  932. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  933. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  934. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  935. ) then
  936. { change movsX/movzX reg/ref, reg2
  937. add/sub/or/... reg3/$const, reg2
  938. mov reg2 reg/ref
  939. to add/sub/or/... reg3/$const, reg/ref }
  940. begin
  941. CopyUsedRegs(TmpUsedRegs);
  942. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  943. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  944. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  945. begin
  946. { by example:
  947. movswl %si,%eax movswl %si,%eax p
  948. decl %eax addl %edx,%eax hp1
  949. movw %ax,%si movw %ax,%si hp2
  950. ->
  951. movswl %si,%eax movswl %si,%eax p
  952. decw %eax addw %edx,%eax hp1
  953. movw %ax,%si movw %ax,%si hp2
  954. }
  955. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  956. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  957. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  958. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  959. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  960. {
  961. ->
  962. movswl %si,%eax movswl %si,%eax p
  963. decw %si addw %dx,%si hp1
  964. movw %ax,%si movw %ax,%si hp2
  965. }
  966. case taicpu(hp1).ops of
  967. 1:
  968. begin
  969. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  970. if taicpu(hp1).oper[0]^.typ=top_reg then
  971. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  972. end;
  973. 2:
  974. begin
  975. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  976. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  977. (taicpu(hp1).opcode<>A_SHL) and
  978. (taicpu(hp1).opcode<>A_SHR) and
  979. (taicpu(hp1).opcode<>A_SAR) then
  980. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  981. end;
  982. else
  983. internalerror(2008042701);
  984. end;
  985. {
  986. ->
  987. decw %si addw %dx,%si p
  988. }
  989. asml.remove(p);
  990. asml.remove(hp2);
  991. p.Free;
  992. hp2.Free;
  993. p := hp1;
  994. end;
  995. ReleaseUsedRegs(TmpUsedRegs);
  996. end
  997. else if GetNextIntruction_p and
  998. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  999. GetNextInstruction(hp1, hp2) and
  1000. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1001. MatchOperand(Taicpu(p).oper[0]^,0) and
  1002. (Taicpu(p).oper[1]^.typ = top_reg) and
  1003. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1004. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1005. { mov reg1,0
  1006. bts reg1,operand1 --> mov reg1,operand2
  1007. or reg1,operand2 bts reg1,operand1}
  1008. begin
  1009. Taicpu(hp2).opcode:=A_MOV;
  1010. asml.remove(hp1);
  1011. insertllitem(hp2,hp2.next,hp1);
  1012. asml.remove(p);
  1013. p.free;
  1014. p:=hp1;
  1015. end
  1016. else if GetNextIntruction_p and
  1017. MatchInstruction(hp1,A_LEA,[S_L]) and
  1018. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1019. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1020. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1021. ) or
  1022. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1023. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1024. )
  1025. ) then
  1026. { mov reg1,ref
  1027. lea reg2,[reg1,reg2]
  1028. to
  1029. add reg2,ref}
  1030. begin
  1031. CopyUsedRegs(TmpUsedRegs);
  1032. { reg1 may not be used afterwards }
  1033. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1034. begin
  1035. Taicpu(hp1).opcode:=A_ADD;
  1036. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1037. DebugMsg('Peephole MovLea2Add done',hp1);
  1038. asml.remove(p);
  1039. p.free;
  1040. p:=hp1;
  1041. end;
  1042. ReleaseUsedRegs(TmpUsedRegs);
  1043. end;
  1044. end;
  1045. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1046. var
  1047. TmpUsedRegs : TAllUsedRegs;
  1048. hp1,hp2: tai;
  1049. begin
  1050. Result:=false;
  1051. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1052. GetNextInstruction(p, hp1) and
  1053. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1054. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1055. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1056. or
  1057. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1058. ) and
  1059. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1060. { mov reg1, reg2
  1061. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1062. begin
  1063. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1064. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1065. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1066. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1067. asml.remove(p);
  1068. p.free;
  1069. p := hp1;
  1070. Result:=true;
  1071. exit;
  1072. end
  1073. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1074. GetNextInstruction(p,hp1) and
  1075. (hp1.typ = ait_instruction) and
  1076. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1077. doing it separately in both branches allows to do the cheap checks
  1078. with low probability earlier }
  1079. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1080. GetNextInstruction(hp1,hp2) and
  1081. MatchInstruction(hp2,A_MOV,[])
  1082. ) or
  1083. ((taicpu(hp1).opcode=A_LEA) and
  1084. GetNextInstruction(hp1,hp2) and
  1085. MatchInstruction(hp2,A_MOV,[]) and
  1086. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1087. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1088. ) or
  1089. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1090. taicpu(p).oper[1]^.reg) and
  1091. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1092. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1093. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1094. ) and
  1095. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1096. )
  1097. ) and
  1098. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1099. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1100. begin
  1101. CopyUsedRegs(TmpUsedRegs);
  1102. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1103. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1104. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1105. { change mov (ref), reg
  1106. add/sub/or/... reg2/$const, reg
  1107. mov reg, (ref)
  1108. # release reg
  1109. to add/sub/or/... reg2/$const, (ref) }
  1110. begin
  1111. case taicpu(hp1).opcode of
  1112. A_INC,A_DEC,A_NOT,A_NEG :
  1113. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1114. A_LEA :
  1115. begin
  1116. taicpu(hp1).opcode:=A_ADD;
  1117. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1118. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1119. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1120. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1121. else
  1122. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1123. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1124. DebugMsg('Peephole FoldLea done',hp1);
  1125. end
  1126. else
  1127. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1128. end;
  1129. asml.remove(p);
  1130. asml.remove(hp2);
  1131. p.free;
  1132. hp2.free;
  1133. p := hp1
  1134. end;
  1135. ReleaseUsedRegs(TmpUsedRegs);
  1136. end;
  1137. end;
  1138. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1139. var
  1140. TmpUsedRegs : TAllUsedRegs;
  1141. hp1 : tai;
  1142. begin
  1143. Result:=false;
  1144. if (taicpu(p).ops >= 2) and
  1145. ((taicpu(p).oper[0]^.typ = top_const) or
  1146. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1147. (taicpu(p).oper[1]^.typ = top_reg) and
  1148. ((taicpu(p).ops = 2) or
  1149. ((taicpu(p).oper[2]^.typ = top_reg) and
  1150. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1151. GetLastInstruction(p,hp1) and
  1152. MatchInstruction(hp1,A_MOV,[]) and
  1153. MatchOpType(hp1,top_reg,top_reg) and
  1154. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1155. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1156. begin
  1157. CopyUsedRegs(TmpUsedRegs);
  1158. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1159. { change
  1160. mov reg1,reg2
  1161. imul y,reg2 to imul y,reg1,reg2 }
  1162. begin
  1163. taicpu(p).ops := 3;
  1164. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1165. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1166. DebugMsg('Peephole MovImul2Imul done',p);
  1167. asml.remove(hp1);
  1168. hp1.free;
  1169. result:=true;
  1170. end;
  1171. ReleaseUsedRegs(TmpUsedRegs);
  1172. end;
  1173. end;
  1174. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1175. var
  1176. hp1 : tai;
  1177. begin
  1178. Result:=false;
  1179. if not(GetNextInstruction(p, hp1)) then
  1180. exit;
  1181. if MatchOpType(p,top_const,top_reg) and
  1182. MatchInstruction(hp1,A_AND,[]) and
  1183. MatchOpType(hp1,top_const,top_reg) and
  1184. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1185. { the second register must contain the first one, so compare their subreg types }
  1186. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1187. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1188. { change
  1189. and const1, reg
  1190. and const2, reg
  1191. to
  1192. and (const1 and const2), reg
  1193. }
  1194. begin
  1195. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1196. DebugMsg('Peephole AndAnd2And done',hp1);
  1197. asml.remove(p);
  1198. p.Free;
  1199. p:=hp1;
  1200. Result:=true;
  1201. exit;
  1202. end
  1203. else if MatchOpType(p,top_const,top_reg) and
  1204. MatchInstruction(hp1,A_MOVZX,[]) and
  1205. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1206. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1207. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1208. (((taicpu(p).opsize=S_W) and
  1209. (taicpu(hp1).opsize=S_BW)) or
  1210. ((taicpu(p).opsize=S_L) and
  1211. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1212. {$ifdef x86_64}
  1213. or
  1214. ((taicpu(p).opsize=S_Q) and
  1215. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1216. {$endif x86_64}
  1217. ) then
  1218. begin
  1219. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1220. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1221. ) or
  1222. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1223. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1224. {$ifdef x86_64}
  1225. or
  1226. (((taicpu(hp1).opsize)=S_LQ) and
  1227. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1228. )
  1229. {$endif x86_64}
  1230. then
  1231. begin
  1232. DebugMsg('Peephole AndMovzToAnd done',p);
  1233. asml.remove(hp1);
  1234. hp1.free;
  1235. end;
  1236. end
  1237. else if MatchOpType(p,top_const,top_reg) and
  1238. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1239. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1240. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1241. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1242. (((taicpu(p).opsize=S_W) and
  1243. (taicpu(hp1).opsize=S_BW)) or
  1244. ((taicpu(p).opsize=S_L) and
  1245. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1246. {$ifdef x86_64}
  1247. or
  1248. ((taicpu(p).opsize=S_Q) and
  1249. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1250. {$endif x86_64}
  1251. ) then
  1252. begin
  1253. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1254. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1255. ) or
  1256. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1257. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1258. {$ifdef x86_64}
  1259. or
  1260. (((taicpu(hp1).opsize)=S_LQ) and
  1261. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1262. )
  1263. {$endif x86_64}
  1264. then
  1265. begin
  1266. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1267. asml.remove(hp1);
  1268. hp1.free;
  1269. end;
  1270. end
  1271. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1272. (hp1.typ = ait_instruction) and
  1273. (taicpu(hp1).is_jmp) and
  1274. (taicpu(hp1).opcode<>A_JMP) and
  1275. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1276. { change
  1277. and x, reg
  1278. jxx
  1279. to
  1280. test x, reg
  1281. jxx
  1282. if reg is deallocated before the
  1283. jump, but only if it's a conditional jump (PFV)
  1284. }
  1285. taicpu(p).opcode := A_TEST;
  1286. end;
  1287. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1288. begin
  1289. if MatchOperand(taicpu(p).oper[0]^,0) and
  1290. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1291. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1292. { change "mov $0, %reg" into "xor %reg, %reg" }
  1293. begin
  1294. taicpu(p).opcode := A_XOR;
  1295. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1296. end;
  1297. end;
  1298. end.