aoptx86.pas 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS) or
  368. {$ifndef x86_64}
  369. (p.opcode = A_LDS) or
  370. (p.opcode = A_LES) or
  371. {$endif not x86_64}
  372. (p.opcode = A_LFS) or
  373. (p.opcode = A_LGS) or
  374. (p.opcode = A_LSS)) and
  375. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  376. (p.oper[1]^.typ = top_reg) and
  377. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  378. ((p.oper[0]^.typ = top_const) or
  379. ((p.oper[0]^.typ = top_reg) and
  380. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  381. ((p.oper[0]^.typ = top_ref) and
  382. not RegInRef(reg,p.oper[0]^.ref^)))) or
  383. ((p.opcode = A_POP) and
  384. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  385. ((p.opcode = A_IMUL) and
  386. (p.ops=3) and
  387. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  388. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  389. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  390. ((((p.opcode = A_IMUL) or
  391. (p.opcode = A_MUL)) and
  392. (p.ops=1)) and
  393. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  394. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  395. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  396. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  397. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  398. {$ifdef x86_64}
  399. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  400. {$endif x86_64}
  401. )) or
  402. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  403. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  404. {$ifdef x86_64}
  405. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  406. {$endif x86_64}
  407. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  408. {$ifndef x86_64}
  409. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  410. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  411. {$endif not x86_64}
  412. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  413. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  414. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  415. {$ifndef x86_64}
  416. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  417. {$endif not x86_64}
  418. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  419. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  420. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  421. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  422. {$ifdef x86_64}
  423. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  424. {$endif x86_64}
  425. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  426. (((p.opcode = A_FSTSW) or
  427. (p.opcode = A_FNSTSW)) and
  428. (p.oper[0]^.typ=top_reg) and
  429. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg));
  430. end;
  431. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  432. var
  433. hp2,hp3 : tai;
  434. begin
  435. result:=(p.typ=ait_instruction) and
  436. ((taicpu(p).opcode = A_RET) or
  437. ((taicpu(p).opcode=A_LEAVE) and
  438. GetNextInstruction(p,hp2) and
  439. (hp2.typ=ait_instruction) and
  440. (taicpu(hp2).opcode=A_RET)
  441. ) or
  442. ((taicpu(p).opcode=A_MOV) and
  443. (taicpu(p).oper[0]^.typ=top_reg) and
  444. (taicpu(p).oper[0]^.reg=NR_EBP) and
  445. (taicpu(p).oper[1]^.typ=top_reg) and
  446. (taicpu(p).oper[1]^.reg=NR_ESP) and
  447. GetNextInstruction(p,hp2) and
  448. (hp2.typ=ait_instruction) and
  449. (taicpu(hp2).opcode=A_POP) and
  450. (taicpu(hp2).oper[0]^.typ=top_reg) and
  451. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  452. GetNextInstruction(hp2,hp3) and
  453. (hp3.typ=ait_instruction) and
  454. (taicpu(hp3).opcode=A_RET)
  455. )
  456. );
  457. end;
  458. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  459. begin
  460. isFoldableArithOp := False;
  461. case hp1.opcode of
  462. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  463. isFoldableArithOp :=
  464. ((taicpu(hp1).oper[0]^.typ = top_const) or
  465. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  466. (taicpu(hp1).oper[0]^.reg <> reg))) and
  467. (taicpu(hp1).oper[1]^.typ = top_reg) and
  468. (taicpu(hp1).oper[1]^.reg = reg);
  469. A_INC,A_DEC,A_NEG,A_NOT:
  470. isFoldableArithOp :=
  471. (taicpu(hp1).oper[0]^.typ = top_reg) and
  472. (taicpu(hp1).oper[0]^.reg = reg);
  473. end;
  474. end;
  475. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  476. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  477. var
  478. hp2: tai;
  479. begin
  480. hp2 := p;
  481. repeat
  482. hp2 := tai(hp2.previous);
  483. if assigned(hp2) and
  484. (hp2.typ = ait_regalloc) and
  485. (tai_regalloc(hp2).ratype=ra_dealloc) and
  486. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  487. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  488. begin
  489. asml.remove(hp2);
  490. hp2.free;
  491. break;
  492. end;
  493. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  494. end;
  495. begin
  496. case current_procinfo.procdef.returndef.typ of
  497. arraydef,recorddef,pointerdef,
  498. stringdef,enumdef,procdef,objectdef,errordef,
  499. filedef,setdef,procvardef,
  500. classrefdef,forwarddef:
  501. DoRemoveLastDeallocForFuncRes(RS_EAX);
  502. orddef:
  503. if current_procinfo.procdef.returndef.size <> 0 then
  504. begin
  505. DoRemoveLastDeallocForFuncRes(RS_EAX);
  506. { for int64/qword }
  507. if current_procinfo.procdef.returndef.size = 8 then
  508. DoRemoveLastDeallocForFuncRes(RS_EDX);
  509. end;
  510. end;
  511. end;
  512. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  513. var
  514. TmpUsedRegs : TAllUsedRegs;
  515. hp1,hp2 : tai;
  516. begin
  517. result:=false;
  518. if MatchOpType(taicpu(p),top_reg,top_reg) then
  519. begin
  520. { vmova* reg1,reg1
  521. =>
  522. <nop> }
  523. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  524. begin
  525. GetNextInstruction(p,hp1);
  526. asml.Remove(p);
  527. p.Free;
  528. p:=hp1;
  529. result:=true;
  530. end
  531. else if GetNextInstruction(p,hp1) then
  532. begin
  533. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  534. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  535. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  536. begin
  537. { vmova* reg1,reg2
  538. vmova* reg2,reg3
  539. dealloc reg2
  540. =>
  541. vmova* reg1,reg3 }
  542. CopyUsedRegs(TmpUsedRegs);
  543. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  544. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  545. begin
  546. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  547. asml.Remove(hp1);
  548. hp1.Free;
  549. result:=true;
  550. end
  551. { special case:
  552. vmova* reg1,reg2
  553. vmova* reg2,reg1
  554. =>
  555. vmova* reg1,reg2 }
  556. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  557. begin
  558. asml.Remove(hp1);
  559. hp1.Free;
  560. result:=true;
  561. end
  562. end
  563. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  564. { we mix single and double opperations here because we assume that the compiler
  565. generates vmovapd only after double operations and vmovaps only after single operations }
  566. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  567. GetNextInstruction(hp1,hp2) and
  568. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  569. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  570. begin
  571. CopyUsedRegs(TmpUsedRegs);
  572. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  573. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  574. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  575. then
  576. begin
  577. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  578. asml.Remove(p);
  579. p.Free;
  580. asml.Remove(hp2);
  581. hp2.Free;
  582. p:=hp1;
  583. end;
  584. end;
  585. end;
  586. end;
  587. end;
  588. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  589. var
  590. TmpUsedRegs : TAllUsedRegs;
  591. hp1 : tai;
  592. begin
  593. result:=false;
  594. if GetNextInstruction(p,hp1) and
  595. { we mix single and double opperations here because we assume that the compiler
  596. generates vmovapd only after double operations and vmovaps only after single operations }
  597. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  598. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  599. (taicpu(hp1).oper[1]^.typ=top_reg) then
  600. begin
  601. CopyUsedRegs(TmpUsedRegs);
  602. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  603. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  604. ) then
  605. begin
  606. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  607. asml.Remove(hp1);
  608. hp1.Free;
  609. result:=true;
  610. end;
  611. end;
  612. end;
  613. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  614. var
  615. hp1, hp2: tai;
  616. TmpUsedRegs : TAllUsedRegs;
  617. GetNextIntruction_p : Boolean;
  618. begin
  619. Result:=false;
  620. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  621. if GetNextIntruction_p and
  622. MatchInstruction(hp1,A_AND,[]) and
  623. (taicpu(p).oper[1]^.typ = top_reg) and
  624. MatchOpType(taicpu(hp1),top_const,top_reg) and
  625. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  626. case taicpu(p).opsize Of
  627. S_L:
  628. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  629. begin
  630. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  631. asml.remove(hp1);
  632. hp1.free;
  633. Result:=true;
  634. exit;
  635. end;
  636. end
  637. else if GetNextIntruction_p and
  638. MatchInstruction(hp1,A_MOV,[]) and
  639. (taicpu(p).oper[1]^.typ = top_reg) and
  640. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  641. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  642. begin
  643. CopyUsedRegs(TmpUsedRegs);
  644. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  645. { we have
  646. mov x, %treg
  647. mov %treg, y
  648. }
  649. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  650. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  651. { we've got
  652. mov x, %treg
  653. mov %treg, y
  654. with %treg is not used after }
  655. case taicpu(p).oper[0]^.typ Of
  656. top_reg:
  657. begin
  658. { change
  659. mov %reg, %treg
  660. mov %treg, y
  661. to
  662. mov %reg, y
  663. }
  664. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  665. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  666. asml.remove(hp1);
  667. hp1.free;
  668. ReleaseUsedRegs(TmpUsedRegs);
  669. Exit;
  670. end;
  671. top_ref:
  672. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  673. begin
  674. { change
  675. mov mem, %treg
  676. mov %treg, %reg
  677. to
  678. mov mem, %reg"
  679. }
  680. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  681. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  682. asml.remove(hp1);
  683. hp1.free;
  684. ReleaseUsedRegs(TmpUsedRegs);
  685. Exit;
  686. end;
  687. end;
  688. ReleaseUsedRegs(TmpUsedRegs);
  689. end
  690. else
  691. { Change
  692. mov %reg1, %reg2
  693. xxx %reg2, ???
  694. to
  695. mov %reg1, %reg2
  696. xxx %reg1, ???
  697. to avoid a write/read penalty
  698. }
  699. if MatchOpType(taicpu(p),top_reg,top_reg) and
  700. GetNextInstruction(p,hp1) and
  701. (tai(hp1).typ = ait_instruction) and
  702. (taicpu(hp1).ops >= 1) and
  703. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  704. { we have
  705. mov %reg1, %reg2
  706. XXX %reg2, ???
  707. }
  708. begin
  709. if ((taicpu(hp1).opcode = A_OR) or
  710. (taicpu(hp1).opcode = A_TEST)) and
  711. (taicpu(hp1).oper[1]^.typ = top_reg) and
  712. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  713. { we have
  714. mov %reg1, %reg2
  715. test/or %reg2, %reg2
  716. }
  717. begin
  718. CopyUsedRegs(TmpUsedRegs);
  719. { reg1 will be used after the first instruction,
  720. so update the allocation info }
  721. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  722. if GetNextInstruction(hp1, hp2) and
  723. (hp2.typ = ait_instruction) and
  724. taicpu(hp2).is_jmp and
  725. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  726. { change
  727. mov %reg1, %reg2
  728. test/or %reg2, %reg2
  729. jxx
  730. to
  731. test %reg1, %reg1
  732. jxx
  733. }
  734. begin
  735. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  736. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  737. asml.remove(p);
  738. p.free;
  739. p := hp1;
  740. ReleaseUsedRegs(TmpUsedRegs);
  741. Exit;
  742. end
  743. else
  744. { change
  745. mov %reg1, %reg2
  746. test/or %reg2, %reg2
  747. to
  748. mov %reg1, %reg2
  749. test/or %reg1, %reg1
  750. }
  751. begin
  752. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  753. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  754. end;
  755. ReleaseUsedRegs(TmpUsedRegs);
  756. end
  757. end
  758. else
  759. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  760. x >= RetOffset) as it doesn't do anything (it writes either to a
  761. parameter or to the temporary storage room for the function
  762. result)
  763. }
  764. if GetNextIntruction_p and
  765. (tai(hp1).typ = ait_instruction) then
  766. begin
  767. if IsExitCode(hp1) and
  768. MatchOpType(p,top_reg,top_ref) and
  769. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  770. not(assigned(current_procinfo.procdef.funcretsym) and
  771. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  772. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  773. begin
  774. asml.remove(p);
  775. p.free;
  776. p:=hp1;
  777. DebugMsg('Peephole removed deadstore before leave/ret',p);
  778. RemoveLastDeallocForFuncRes(p);
  779. exit;
  780. end
  781. { change
  782. mov reg1, mem1
  783. cmp x, mem1
  784. to
  785. mov reg1, mem1
  786. cmp x, reg1
  787. }
  788. else if MatchOpType(p,top_reg,top_ref) and
  789. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  790. (taicpu(hp1).oper[1]^.typ = top_ref) and
  791. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  792. begin
  793. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  794. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  795. end;
  796. end;
  797. { Next instruction is also a MOV ? }
  798. if GetNextIntruction_p and
  799. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  800. begin
  801. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  802. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  803. { mov reg1, mem1 or mov mem1, reg1
  804. mov mem2, reg2 mov reg2, mem2}
  805. begin
  806. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  807. { mov reg1, mem1 or mov mem1, reg1
  808. mov mem2, reg1 mov reg2, mem1}
  809. begin
  810. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  811. { Removes the second statement from
  812. mov reg1, mem1/reg2
  813. mov mem1/reg2, reg1 }
  814. begin
  815. if taicpu(p).oper[0]^.typ=top_reg then
  816. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  817. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  818. asml.remove(hp1);
  819. hp1.free;
  820. Result:=true;
  821. exit;
  822. end
  823. else
  824. begin
  825. CopyUsedRegs(TmpUsedRegs);
  826. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  827. if (taicpu(p).oper[1]^.typ = top_ref) and
  828. { mov reg1, mem1
  829. mov mem2, reg1 }
  830. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  831. GetNextInstruction(hp1, hp2) and
  832. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  833. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  834. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  835. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  836. { change to
  837. mov reg1, mem1 mov reg1, mem1
  838. mov mem2, reg1 cmp reg1, mem2
  839. cmp mem1, reg1
  840. }
  841. begin
  842. asml.remove(hp2);
  843. hp2.free;
  844. taicpu(hp1).opcode := A_CMP;
  845. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  846. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  847. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  848. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  849. end;
  850. ReleaseUsedRegs(TmpUsedRegs);
  851. end;
  852. end
  853. else if (taicpu(p).oper[1]^.typ=top_ref) and
  854. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  855. begin
  856. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  857. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  858. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  859. end
  860. else
  861. begin
  862. CopyUsedRegs(TmpUsedRegs);
  863. if GetNextInstruction(hp1, hp2) and
  864. MatchOpType(taicpu(p),top_ref,top_reg) and
  865. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  866. (taicpu(hp1).oper[1]^.typ = top_ref) and
  867. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  868. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  869. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  870. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  871. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  872. { mov mem1, %reg1
  873. mov %reg1, mem2
  874. mov mem2, reg2
  875. to:
  876. mov mem1, reg2
  877. mov reg2, mem2}
  878. begin
  879. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  880. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  881. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  882. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  883. asml.remove(hp2);
  884. hp2.free;
  885. end
  886. {$ifdef i386}
  887. { this is enabled for i386 only, as the rules to create the reg sets below
  888. are too complicated for x86-64, so this makes this code too error prone
  889. on x86-64
  890. }
  891. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  892. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  893. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  894. { mov mem1, reg1 mov mem1, reg1
  895. mov reg1, mem2 mov reg1, mem2
  896. mov mem2, reg2 mov mem2, reg1
  897. to: to:
  898. mov mem1, reg1 mov mem1, reg1
  899. mov mem1, reg2 mov reg1, mem2
  900. mov reg1, mem2
  901. or (if mem1 depends on reg1
  902. and/or if mem2 depends on reg2)
  903. to:
  904. mov mem1, reg1
  905. mov reg1, mem2
  906. mov reg1, reg2
  907. }
  908. begin
  909. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  910. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  911. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  912. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  913. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  914. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  915. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  916. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  917. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  918. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  919. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  920. end
  921. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  922. begin
  923. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  924. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  925. end
  926. else
  927. begin
  928. asml.remove(hp2);
  929. hp2.free;
  930. end
  931. {$endif i386}
  932. ;
  933. ReleaseUsedRegs(TmpUsedRegs);
  934. end;
  935. end
  936. (* { movl [mem1],reg1
  937. movl [mem1],reg2
  938. to
  939. movl [mem1],reg1
  940. movl reg1,reg2
  941. }
  942. else if (taicpu(p).oper[0]^.typ = top_ref) and
  943. (taicpu(p).oper[1]^.typ = top_reg) and
  944. (taicpu(hp1).oper[0]^.typ = top_ref) and
  945. (taicpu(hp1).oper[1]^.typ = top_reg) and
  946. (taicpu(p).opsize = taicpu(hp1).opsize) and
  947. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  948. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  949. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  950. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  951. else*)
  952. { movl const1,[mem1]
  953. movl [mem1],reg1
  954. to
  955. movl const1,reg1
  956. movl reg1,[mem1]
  957. }
  958. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  959. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  960. (taicpu(p).opsize = taicpu(hp1).opsize) and
  961. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  962. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  963. begin
  964. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  965. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  966. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  967. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  968. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  969. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  970. end
  971. end
  972. else if (taicpu(p).oper[1]^.typ = top_reg) and
  973. GetNextIntruction_p and
  974. (hp1.typ = ait_instruction) and
  975. GetNextInstruction(hp1, hp2) and
  976. MatchInstruction(hp2,A_MOV,[]) and
  977. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  978. (taicpu(hp2).oper[0]^.typ=top_reg) and
  979. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  980. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  981. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  982. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  983. ) then
  984. { change movsX/movzX reg/ref, reg2
  985. add/sub/or/... reg3/$const, reg2
  986. mov reg2 reg/ref
  987. to add/sub/or/... reg3/$const, reg/ref }
  988. begin
  989. CopyUsedRegs(TmpUsedRegs);
  990. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  991. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  992. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  993. begin
  994. { by example:
  995. movswl %si,%eax movswl %si,%eax p
  996. decl %eax addl %edx,%eax hp1
  997. movw %ax,%si movw %ax,%si hp2
  998. ->
  999. movswl %si,%eax movswl %si,%eax p
  1000. decw %eax addw %edx,%eax hp1
  1001. movw %ax,%si movw %ax,%si hp2
  1002. }
  1003. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1004. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1005. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1006. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1007. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1008. {
  1009. ->
  1010. movswl %si,%eax movswl %si,%eax p
  1011. decw %si addw %dx,%si hp1
  1012. movw %ax,%si movw %ax,%si hp2
  1013. }
  1014. case taicpu(hp1).ops of
  1015. 1:
  1016. begin
  1017. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1018. if taicpu(hp1).oper[0]^.typ=top_reg then
  1019. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1020. end;
  1021. 2:
  1022. begin
  1023. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1024. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1025. (taicpu(hp1).opcode<>A_SHL) and
  1026. (taicpu(hp1).opcode<>A_SHR) and
  1027. (taicpu(hp1).opcode<>A_SAR) then
  1028. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1029. end;
  1030. else
  1031. internalerror(2008042701);
  1032. end;
  1033. {
  1034. ->
  1035. decw %si addw %dx,%si p
  1036. }
  1037. asml.remove(p);
  1038. asml.remove(hp2);
  1039. p.Free;
  1040. hp2.Free;
  1041. p := hp1;
  1042. end;
  1043. ReleaseUsedRegs(TmpUsedRegs);
  1044. end
  1045. else if GetNextIntruction_p and
  1046. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1047. GetNextInstruction(hp1, hp2) and
  1048. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1049. MatchOperand(Taicpu(p).oper[0]^,0) and
  1050. (Taicpu(p).oper[1]^.typ = top_reg) and
  1051. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1052. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1053. { mov reg1,0
  1054. bts reg1,operand1 --> mov reg1,operand2
  1055. or reg1,operand2 bts reg1,operand1}
  1056. begin
  1057. Taicpu(hp2).opcode:=A_MOV;
  1058. asml.remove(hp1);
  1059. insertllitem(hp2,hp2.next,hp1);
  1060. asml.remove(p);
  1061. p.free;
  1062. p:=hp1;
  1063. end
  1064. else if GetNextIntruction_p and
  1065. MatchInstruction(hp1,A_LEA,[S_L]) and
  1066. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1067. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1068. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1069. ) or
  1070. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1071. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1072. )
  1073. ) then
  1074. { mov reg1,ref
  1075. lea reg2,[reg1,reg2]
  1076. to
  1077. add reg2,ref}
  1078. begin
  1079. CopyUsedRegs(TmpUsedRegs);
  1080. { reg1 may not be used afterwards }
  1081. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1082. begin
  1083. Taicpu(hp1).opcode:=A_ADD;
  1084. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1085. DebugMsg('Peephole MovLea2Add done',hp1);
  1086. asml.remove(p);
  1087. p.free;
  1088. p:=hp1;
  1089. end;
  1090. ReleaseUsedRegs(TmpUsedRegs);
  1091. end;
  1092. end;
  1093. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1094. var
  1095. TmpUsedRegs : TAllUsedRegs;
  1096. hp1,hp2: tai;
  1097. begin
  1098. Result:=false;
  1099. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1100. GetNextInstruction(p, hp1) and
  1101. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1102. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1103. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1104. or
  1105. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1106. ) and
  1107. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1108. { mov reg1, reg2
  1109. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1110. begin
  1111. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1112. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1113. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1114. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1115. asml.remove(p);
  1116. p.free;
  1117. p := hp1;
  1118. Result:=true;
  1119. exit;
  1120. end
  1121. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1122. GetNextInstruction(p,hp1) and
  1123. (hp1.typ = ait_instruction) and
  1124. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1125. doing it separately in both branches allows to do the cheap checks
  1126. with low probability earlier }
  1127. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1128. GetNextInstruction(hp1,hp2) and
  1129. MatchInstruction(hp2,A_MOV,[])
  1130. ) or
  1131. ((taicpu(hp1).opcode=A_LEA) and
  1132. GetNextInstruction(hp1,hp2) and
  1133. MatchInstruction(hp2,A_MOV,[]) and
  1134. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1135. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1136. ) or
  1137. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1138. taicpu(p).oper[1]^.reg) and
  1139. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1140. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1141. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1142. ) and
  1143. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1144. )
  1145. ) and
  1146. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1147. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1148. begin
  1149. CopyUsedRegs(TmpUsedRegs);
  1150. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1151. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1152. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1153. { change mov (ref), reg
  1154. add/sub/or/... reg2/$const, reg
  1155. mov reg, (ref)
  1156. # release reg
  1157. to add/sub/or/... reg2/$const, (ref) }
  1158. begin
  1159. case taicpu(hp1).opcode of
  1160. A_INC,A_DEC,A_NOT,A_NEG :
  1161. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1162. A_LEA :
  1163. begin
  1164. taicpu(hp1).opcode:=A_ADD;
  1165. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1166. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1167. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1168. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1169. else
  1170. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1171. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1172. DebugMsg('Peephole FoldLea done',hp1);
  1173. end
  1174. else
  1175. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1176. end;
  1177. asml.remove(p);
  1178. asml.remove(hp2);
  1179. p.free;
  1180. hp2.free;
  1181. p := hp1
  1182. end;
  1183. ReleaseUsedRegs(TmpUsedRegs);
  1184. end;
  1185. end;
  1186. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1187. var
  1188. TmpUsedRegs : TAllUsedRegs;
  1189. hp1 : tai;
  1190. begin
  1191. Result:=false;
  1192. if (taicpu(p).ops >= 2) and
  1193. ((taicpu(p).oper[0]^.typ = top_const) or
  1194. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1195. (taicpu(p).oper[1]^.typ = top_reg) and
  1196. ((taicpu(p).ops = 2) or
  1197. ((taicpu(p).oper[2]^.typ = top_reg) and
  1198. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1199. GetLastInstruction(p,hp1) and
  1200. MatchInstruction(hp1,A_MOV,[]) and
  1201. MatchOpType(hp1,top_reg,top_reg) and
  1202. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1203. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1204. begin
  1205. CopyUsedRegs(TmpUsedRegs);
  1206. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1207. { change
  1208. mov reg1,reg2
  1209. imul y,reg2 to imul y,reg1,reg2 }
  1210. begin
  1211. taicpu(p).ops := 3;
  1212. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1213. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1214. DebugMsg('Peephole MovImul2Imul done',p);
  1215. asml.remove(hp1);
  1216. hp1.free;
  1217. result:=true;
  1218. end;
  1219. ReleaseUsedRegs(TmpUsedRegs);
  1220. end;
  1221. end;
  1222. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1223. var
  1224. hp1 : tai;
  1225. begin
  1226. Result:=false;
  1227. if not(GetNextInstruction(p, hp1)) then
  1228. exit;
  1229. if MatchOpType(p,top_const,top_reg) and
  1230. MatchInstruction(hp1,A_AND,[]) and
  1231. MatchOpType(hp1,top_const,top_reg) and
  1232. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1233. { the second register must contain the first one, so compare their subreg types }
  1234. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1235. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1236. { change
  1237. and const1, reg
  1238. and const2, reg
  1239. to
  1240. and (const1 and const2), reg
  1241. }
  1242. begin
  1243. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1244. DebugMsg('Peephole AndAnd2And done',hp1);
  1245. asml.remove(p);
  1246. p.Free;
  1247. p:=hp1;
  1248. Result:=true;
  1249. exit;
  1250. end
  1251. else if MatchOpType(p,top_const,top_reg) and
  1252. MatchInstruction(hp1,A_MOVZX,[]) and
  1253. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1254. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1255. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1256. (((taicpu(p).opsize=S_W) and
  1257. (taicpu(hp1).opsize=S_BW)) or
  1258. ((taicpu(p).opsize=S_L) and
  1259. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1260. {$ifdef x86_64}
  1261. or
  1262. ((taicpu(p).opsize=S_Q) and
  1263. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1264. {$endif x86_64}
  1265. ) then
  1266. begin
  1267. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1268. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1269. ) or
  1270. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1271. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1272. {$ifdef x86_64}
  1273. or
  1274. (((taicpu(hp1).opsize)=S_LQ) and
  1275. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1276. )
  1277. {$endif x86_64}
  1278. then
  1279. begin
  1280. DebugMsg('Peephole AndMovzToAnd done',p);
  1281. asml.remove(hp1);
  1282. hp1.free;
  1283. end;
  1284. end
  1285. else if MatchOpType(p,top_const,top_reg) and
  1286. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1287. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1288. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1289. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1290. (((taicpu(p).opsize=S_W) and
  1291. (taicpu(hp1).opsize=S_BW)) or
  1292. ((taicpu(p).opsize=S_L) and
  1293. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1294. {$ifdef x86_64}
  1295. or
  1296. ((taicpu(p).opsize=S_Q) and
  1297. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1298. {$endif x86_64}
  1299. ) then
  1300. begin
  1301. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1302. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1303. ) or
  1304. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1305. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1306. {$ifdef x86_64}
  1307. or
  1308. (((taicpu(hp1).opsize)=S_LQ) and
  1309. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1310. )
  1311. {$endif x86_64}
  1312. then
  1313. begin
  1314. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1315. asml.remove(hp1);
  1316. hp1.free;
  1317. end;
  1318. end
  1319. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1320. (hp1.typ = ait_instruction) and
  1321. (taicpu(hp1).is_jmp) and
  1322. (taicpu(hp1).opcode<>A_JMP) and
  1323. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1324. { change
  1325. and x, reg
  1326. jxx
  1327. to
  1328. test x, reg
  1329. jxx
  1330. if reg is deallocated before the
  1331. jump, but only if it's a conditional jump (PFV)
  1332. }
  1333. taicpu(p).opcode := A_TEST;
  1334. end;
  1335. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1336. begin
  1337. if MatchOperand(taicpu(p).oper[0]^,0) and
  1338. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1339. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1340. { change "mov $0, %reg" into "xor %reg, %reg" }
  1341. begin
  1342. taicpu(p).opcode := A_XOR;
  1343. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1344. end;
  1345. end;
  1346. end.