aoptx86.pas 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. procedure DebugMsg(const s : string; p : tai);inline;
  46. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  47. class function IsExitCode(p : tai) : boolean;
  48. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  49. procedure RemoveLastDeallocForFuncRes(p : tai);
  50. end;
  51. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  52. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  53. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  55. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  56. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  57. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  58. function RefsEqual(const r1, r2: treference): boolean;
  59. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  60. { returns true, if ref is a reference using only the registers passed as base and index
  61. and having an offset }
  62. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  63. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  64. implementation
  65. uses
  66. cutils,
  67. verbose,
  68. procinfo,
  69. symconst,symsym,
  70. itcpugas;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  79. begin
  80. result :=
  81. (instr.typ = ait_instruction) and
  82. ((taicpu(instr).opcode = op1) or
  83. (taicpu(instr).opcode = op2)
  84. ) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize));
  86. end;
  87. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. ((taicpu(instr).opcode = op1) or
  92. (taicpu(instr).opcode = op2) or
  93. (taicpu(instr).opcode = op3)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  98. const opsize : topsizes) : boolean;
  99. var
  100. op : TAsmOp;
  101. begin
  102. result:=false;
  103. for op in ops do
  104. begin
  105. if (instr.typ = ait_instruction) and
  106. (taicpu(instr).opcode = op) and
  107. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  108. begin
  109. result:=true;
  110. exit;
  111. end;
  112. end;
  113. end;
  114. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  115. begin
  116. result := (oper.typ = top_reg) and (oper.reg = reg);
  117. end;
  118. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  119. begin
  120. result := (oper.typ = top_const) and (oper.val = a);
  121. end;
  122. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  123. begin
  124. result := oper1.typ = oper2.typ;
  125. if result then
  126. case oper1.typ of
  127. top_const:
  128. Result:=oper1.val = oper2.val;
  129. top_reg:
  130. Result:=oper1.reg = oper2.reg;
  131. top_ref:
  132. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  133. else
  134. internalerror(2013102801);
  135. end
  136. end;
  137. function RefsEqual(const r1, r2: treference): boolean;
  138. begin
  139. RefsEqual :=
  140. (r1.offset = r2.offset) and
  141. (r1.segment = r2.segment) and (r1.base = r2.base) and
  142. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  143. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  144. (r1.relsymbol = r2.relsymbol);
  145. end;
  146. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  147. begin
  148. Result:=(ref.offset=0) and
  149. (ref.scalefactor in [0,1]) and
  150. (ref.segment=NR_NO) and
  151. (ref.symbol=nil) and
  152. (ref.relsymbol=nil) and
  153. ((base=NR_INVALID) or
  154. (ref.base=base)) and
  155. ((index=NR_INVALID) or
  156. (ref.index=index));
  157. end;
  158. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  159. begin
  160. Result:=(ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  170. begin
  171. Result:=(taicpu(instr).ops=2) and
  172. (taicpu(instr).oper[0]^.typ=ot0) and
  173. (taicpu(instr).oper[1]^.typ=ot1);
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. { allocates register reg between (and including) instructions p1 and p2
  237. the type of p1 and p2 must not be in SkipInstr
  238. note that this routine is both called from the peephole optimizer
  239. where optinfo is not yet initialised) and from the cse (where it is) }
  240. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  241. var
  242. hp, start: tai;
  243. removedsomething,
  244. firstRemovedWasAlloc,
  245. lastRemovedWasDealloc: boolean;
  246. begin
  247. {$ifdef EXTDEBUG}
  248. { if assigned(p1.optinfo) and
  249. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  250. internalerror(2004101010); }
  251. {$endif EXTDEBUG}
  252. start := p1;
  253. if (reg = NR_ESP) or
  254. (reg = current_procinfo.framepointer) or
  255. not(assigned(p1)) then
  256. { this happens with registers which are loaded implicitely, outside the }
  257. { current block (e.g. esi with self) }
  258. exit;
  259. { make sure we allocate it for this instruction }
  260. getnextinstruction(p2,p2);
  261. lastRemovedWasDealloc := false;
  262. removedSomething := false;
  263. firstRemovedWasAlloc := false;
  264. {$ifdef allocregdebug}
  265. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  266. ' from here...'));
  267. insertllitem(asml,p1.previous,p1,hp);
  268. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  269. ' till here...'));
  270. insertllitem(asml,p2,p2.next,hp);
  271. {$endif allocregdebug}
  272. { do it the safe way: always allocate the full super register,
  273. as we do no register re-allocation in the peephole optimizer,
  274. this does not hurt
  275. }
  276. case getregtype(reg) of
  277. R_MMREGISTER:
  278. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  279. R_INTREGISTER:
  280. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  281. end;
  282. if not(RegInUsedRegs(reg,initialusedregs)) then
  283. begin
  284. hp := tai_regalloc.alloc(reg,nil);
  285. insertllItem(p1.previous,p1,hp);
  286. IncludeRegInUsedRegs(reg,initialusedregs);
  287. end;
  288. while assigned(p1) and
  289. (p1 <> p2) do
  290. begin
  291. if assigned(p1.optinfo) then
  292. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  293. p1 := tai(p1.next);
  294. repeat
  295. while assigned(p1) and
  296. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  297. p1 := tai(p1.next);
  298. { remove all allocation/deallocation info about the register in between }
  299. if assigned(p1) and
  300. (p1.typ = ait_regalloc) then
  301. begin
  302. { same super register, different sub register? }
  303. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  304. begin
  305. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  306. internalerror(2016101501);
  307. tai_regalloc(p1).reg:=reg;
  308. end;
  309. if tai_regalloc(p1).reg=reg then
  310. begin
  311. if not removedSomething then
  312. begin
  313. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  314. removedSomething := true;
  315. end;
  316. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  317. hp := tai(p1.Next);
  318. asml.Remove(p1);
  319. p1.free;
  320. p1 := hp;
  321. end
  322. else
  323. p1 := tai(p1.next);
  324. end;
  325. until not(assigned(p1)) or
  326. not(p1.typ in SkipInstr);
  327. end;
  328. if assigned(p1) then
  329. begin
  330. if firstRemovedWasAlloc then
  331. begin
  332. hp := tai_regalloc.Alloc(reg,nil);
  333. insertLLItem(start.previous,start,hp);
  334. end;
  335. if lastRemovedWasDealloc then
  336. begin
  337. hp := tai_regalloc.DeAlloc(reg,nil);
  338. insertLLItem(p1.previous,p1,hp);
  339. end;
  340. end;
  341. end;
  342. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  343. var
  344. p: taicpu;
  345. begin
  346. if not assigned(hp) or
  347. (hp.typ <> ait_instruction) then
  348. begin
  349. Result := false;
  350. exit;
  351. end;
  352. p := taicpu(hp);
  353. Result :=
  354. (((p.opcode = A_MOV) or
  355. (p.opcode = A_MOVZX) or
  356. (p.opcode = A_MOVSX) or
  357. (p.opcode = A_LEA) or
  358. (p.opcode = A_VMOVSS) or
  359. (p.opcode = A_VMOVSD) or
  360. (p.opcode = A_VMOVAPD) or
  361. (p.opcode = A_VMOVAPS) or
  362. (p.opcode = A_VMOVQ) or
  363. (p.opcode = A_MOVSS) or
  364. (p.opcode = A_MOVSD) or
  365. (p.opcode = A_MOVQ) or
  366. (p.opcode = A_MOVAPD) or
  367. (p.opcode = A_MOVAPS) or
  368. {$ifndef x86_64}
  369. (p.opcode = A_LDS) or
  370. (p.opcode = A_LES) or
  371. {$endif not x86_64}
  372. (p.opcode = A_LFS) or
  373. (p.opcode = A_LGS) or
  374. (p.opcode = A_LSS)) and
  375. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  376. (p.oper[1]^.typ = top_reg) and
  377. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  378. ((p.oper[0]^.typ = top_const) or
  379. ((p.oper[0]^.typ = top_reg) and
  380. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  381. ((p.oper[0]^.typ = top_ref) and
  382. not RegInRef(reg,p.oper[0]^.ref^)))) or
  383. ((p.opcode = A_POP) and
  384. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  385. ((p.opcode = A_IMUL) and
  386. (p.ops=3) and
  387. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  388. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  389. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  390. ((((p.opcode = A_IMUL) or
  391. (p.opcode = A_MUL)) and
  392. (p.ops=1)) and
  393. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  394. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  395. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  396. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  397. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  398. {$ifdef x86_64}
  399. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  400. {$endif x86_64}
  401. )) or
  402. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  403. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  404. {$ifdef x86_64}
  405. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  406. {$endif x86_64}
  407. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  408. {$ifndef x86_64}
  409. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  410. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  411. {$endif not x86_64}
  412. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  413. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  414. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^)));
  415. end;
  416. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  417. var
  418. hp2,hp3 : tai;
  419. begin
  420. result:=(p.typ=ait_instruction) and
  421. ((taicpu(p).opcode = A_RET) or
  422. ((taicpu(p).opcode=A_LEAVE) and
  423. GetNextInstruction(p,hp2) and
  424. (hp2.typ=ait_instruction) and
  425. (taicpu(hp2).opcode=A_RET)
  426. ) or
  427. ((taicpu(p).opcode=A_MOV) and
  428. (taicpu(p).oper[0]^.typ=top_reg) and
  429. (taicpu(p).oper[0]^.reg=NR_EBP) and
  430. (taicpu(p).oper[1]^.typ=top_reg) and
  431. (taicpu(p).oper[1]^.reg=NR_ESP) and
  432. GetNextInstruction(p,hp2) and
  433. (hp2.typ=ait_instruction) and
  434. (taicpu(hp2).opcode=A_POP) and
  435. (taicpu(hp2).oper[0]^.typ=top_reg) and
  436. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  437. GetNextInstruction(hp2,hp3) and
  438. (hp3.typ=ait_instruction) and
  439. (taicpu(hp3).opcode=A_RET)
  440. )
  441. );
  442. end;
  443. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  444. begin
  445. isFoldableArithOp := False;
  446. case hp1.opcode of
  447. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  448. isFoldableArithOp :=
  449. ((taicpu(hp1).oper[0]^.typ = top_const) or
  450. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  451. (taicpu(hp1).oper[0]^.reg <> reg))) and
  452. (taicpu(hp1).oper[1]^.typ = top_reg) and
  453. (taicpu(hp1).oper[1]^.reg = reg);
  454. A_INC,A_DEC,A_NEG,A_NOT:
  455. isFoldableArithOp :=
  456. (taicpu(hp1).oper[0]^.typ = top_reg) and
  457. (taicpu(hp1).oper[0]^.reg = reg);
  458. end;
  459. end;
  460. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  461. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  462. var
  463. hp2: tai;
  464. begin
  465. hp2 := p;
  466. repeat
  467. hp2 := tai(hp2.previous);
  468. if assigned(hp2) and
  469. (hp2.typ = ait_regalloc) and
  470. (tai_regalloc(hp2).ratype=ra_dealloc) and
  471. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  472. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  473. begin
  474. asml.remove(hp2);
  475. hp2.free;
  476. break;
  477. end;
  478. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  479. end;
  480. begin
  481. case current_procinfo.procdef.returndef.typ of
  482. arraydef,recorddef,pointerdef,
  483. stringdef,enumdef,procdef,objectdef,errordef,
  484. filedef,setdef,procvardef,
  485. classrefdef,forwarddef:
  486. DoRemoveLastDeallocForFuncRes(RS_EAX);
  487. orddef:
  488. if current_procinfo.procdef.returndef.size <> 0 then
  489. begin
  490. DoRemoveLastDeallocForFuncRes(RS_EAX);
  491. { for int64/qword }
  492. if current_procinfo.procdef.returndef.size = 8 then
  493. DoRemoveLastDeallocForFuncRes(RS_EDX);
  494. end;
  495. end;
  496. end;
  497. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  498. var
  499. TmpUsedRegs : TAllUsedRegs;
  500. hp1,hp2 : tai;
  501. begin
  502. result:=false;
  503. if MatchOpType(taicpu(p),top_reg,top_reg) then
  504. begin
  505. { vmova* reg1,reg1
  506. =>
  507. <nop> }
  508. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  509. begin
  510. GetNextInstruction(p,hp1);
  511. asml.Remove(p);
  512. p.Free;
  513. p:=hp1;
  514. result:=true;
  515. end
  516. else if GetNextInstruction(p,hp1) then
  517. begin
  518. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  519. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  520. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  521. begin
  522. { vmova* reg1,reg2
  523. vmova* reg2,reg3
  524. dealloc reg2
  525. =>
  526. vmova* reg1,reg3 }
  527. CopyUsedRegs(TmpUsedRegs);
  528. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  529. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  530. begin
  531. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  532. asml.Remove(hp1);
  533. hp1.Free;
  534. result:=true;
  535. end
  536. { special case:
  537. vmova* reg1,reg2
  538. vmova* reg2,reg1
  539. =>
  540. vmova* reg1,reg2 }
  541. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  542. begin
  543. asml.Remove(hp1);
  544. hp1.Free;
  545. result:=true;
  546. end
  547. end
  548. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  549. { we mix single and double opperations here because we assume that the compiler
  550. generates vmovapd only after double operations and vmovaps only after single operations }
  551. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  552. GetNextInstruction(hp1,hp2) and
  553. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  554. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  555. begin
  556. CopyUsedRegs(TmpUsedRegs);
  557. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  558. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  559. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  560. then
  561. begin
  562. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  563. asml.Remove(p);
  564. p.Free;
  565. asml.Remove(hp2);
  566. hp2.Free;
  567. p:=hp1;
  568. end;
  569. end;
  570. end;
  571. end;
  572. end;
  573. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  574. var
  575. TmpUsedRegs : TAllUsedRegs;
  576. hp1 : tai;
  577. begin
  578. result:=false;
  579. if GetNextInstruction(p,hp1) and
  580. { we mix single and double opperations here because we assume that the compiler
  581. generates vmovapd only after double operations and vmovaps only after single operations }
  582. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  583. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  584. (taicpu(hp1).oper[1]^.typ=top_reg) then
  585. begin
  586. CopyUsedRegs(TmpUsedRegs);
  587. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  588. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  589. ) then
  590. begin
  591. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  592. asml.Remove(hp1);
  593. hp1.Free;
  594. result:=true;
  595. end;
  596. end;
  597. end;
  598. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  599. var
  600. hp1, hp2: tai;
  601. TmpUsedRegs : TAllUsedRegs;
  602. GetNextIntruction_p : Boolean;
  603. begin
  604. Result:=false;
  605. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  606. if GetNextIntruction_p and
  607. MatchInstruction(hp1,A_AND,[]) and
  608. (taicpu(p).oper[1]^.typ = top_reg) and
  609. MatchOpType(taicpu(hp1),top_const,top_reg) and
  610. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  611. case taicpu(p).opsize Of
  612. S_L:
  613. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  614. begin
  615. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  616. asml.remove(hp1);
  617. hp1.free;
  618. Result:=true;
  619. exit;
  620. end;
  621. end
  622. else if GetNextIntruction_p and
  623. MatchInstruction(hp1,A_MOV,[]) and
  624. (taicpu(p).oper[1]^.typ = top_reg) and
  625. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  626. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  627. begin
  628. CopyUsedRegs(TmpUsedRegs);
  629. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  630. { we have
  631. mov x, %treg
  632. mov %treg, y
  633. }
  634. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  635. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  636. { we've got
  637. mov x, %treg
  638. mov %treg, y
  639. with %treg is not used after }
  640. case taicpu(p).oper[0]^.typ Of
  641. top_reg:
  642. begin
  643. { change
  644. mov %reg, %treg
  645. mov %treg, y
  646. to
  647. mov %reg, y
  648. }
  649. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  650. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  651. asml.remove(hp1);
  652. hp1.free;
  653. ReleaseUsedRegs(TmpUsedRegs);
  654. Exit;
  655. end;
  656. top_ref:
  657. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  658. begin
  659. { change
  660. mov mem, %treg
  661. mov %treg, %reg
  662. to
  663. mov mem, %reg"
  664. }
  665. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  666. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  667. asml.remove(hp1);
  668. hp1.free;
  669. ReleaseUsedRegs(TmpUsedRegs);
  670. Exit;
  671. end;
  672. end;
  673. ReleaseUsedRegs(TmpUsedRegs);
  674. end
  675. else
  676. { Change
  677. mov %reg1, %reg2
  678. xxx %reg2, ???
  679. to
  680. mov %reg1, %reg2
  681. xxx %reg1, ???
  682. to avoid a write/read penalty
  683. }
  684. if MatchOpType(taicpu(p),top_reg,top_reg) and
  685. GetNextInstruction(p,hp1) and
  686. (tai(hp1).typ = ait_instruction) and
  687. (taicpu(hp1).ops >= 1) and
  688. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  689. { we have
  690. mov %reg1, %reg2
  691. XXX %reg2, ???
  692. }
  693. begin
  694. if ((taicpu(hp1).opcode = A_OR) or
  695. (taicpu(hp1).opcode = A_TEST)) and
  696. (taicpu(hp1).oper[1]^.typ = top_reg) and
  697. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  698. { we have
  699. mov %reg1, %reg2
  700. test/or %reg2, %reg2
  701. }
  702. begin
  703. CopyUsedRegs(TmpUsedRegs);
  704. { reg1 will be used after the first instruction,
  705. so update the allocation info }
  706. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  707. if GetNextInstruction(hp1, hp2) and
  708. (hp2.typ = ait_instruction) and
  709. taicpu(hp2).is_jmp and
  710. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  711. { change
  712. mov %reg1, %reg2
  713. test/or %reg2, %reg2
  714. jxx
  715. to
  716. test %reg1, %reg1
  717. jxx
  718. }
  719. begin
  720. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  721. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  722. asml.remove(p);
  723. p.free;
  724. p := hp1;
  725. ReleaseUsedRegs(TmpUsedRegs);
  726. Exit;
  727. end
  728. else
  729. { change
  730. mov %reg1, %reg2
  731. test/or %reg2, %reg2
  732. to
  733. mov %reg1, %reg2
  734. test/or %reg1, %reg1
  735. }
  736. begin
  737. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  738. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  739. end;
  740. ReleaseUsedRegs(TmpUsedRegs);
  741. end
  742. end
  743. else
  744. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  745. x >= RetOffset) as it doesn't do anything (it writes either to a
  746. parameter or to the temporary storage room for the function
  747. result)
  748. }
  749. if GetNextIntruction_p and
  750. (tai(hp1).typ = ait_instruction) then
  751. begin
  752. if IsExitCode(hp1) and
  753. MatchOpType(p,top_reg,top_ref) and
  754. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  755. not(assigned(current_procinfo.procdef.funcretsym) and
  756. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  757. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  758. begin
  759. asml.remove(p);
  760. p.free;
  761. p:=hp1;
  762. DebugMsg('Peephole removed deadstore before leave/ret',p);
  763. RemoveLastDeallocForFuncRes(p);
  764. exit;
  765. end
  766. { change
  767. mov reg1, mem1
  768. cmp x, mem1
  769. to
  770. mov reg1, mem1
  771. cmp x, reg1
  772. }
  773. else if MatchOpType(p,top_reg,top_ref) and
  774. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  775. (taicpu(hp1).oper[1]^.typ = top_ref) and
  776. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  777. begin
  778. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  779. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  780. end;
  781. end;
  782. { Next instruction is also a MOV ? }
  783. if GetNextIntruction_p and
  784. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  785. begin
  786. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  787. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  788. { mov reg1, mem1 or mov mem1, reg1
  789. mov mem2, reg2 mov reg2, mem2}
  790. begin
  791. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  792. { mov reg1, mem1 or mov mem1, reg1
  793. mov mem2, reg1 mov reg2, mem1}
  794. begin
  795. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  796. { Removes the second statement from
  797. mov reg1, mem1/reg2
  798. mov mem1/reg2, reg1 }
  799. begin
  800. if taicpu(p).oper[0]^.typ=top_reg then
  801. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  802. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  803. asml.remove(hp1);
  804. hp1.free;
  805. Result:=true;
  806. exit;
  807. end
  808. else
  809. begin
  810. CopyUsedRegs(TmpUsedRegs);
  811. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  812. if (taicpu(p).oper[1]^.typ = top_ref) and
  813. { mov reg1, mem1
  814. mov mem2, reg1 }
  815. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  816. GetNextInstruction(hp1, hp2) and
  817. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  818. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  819. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  820. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  821. { change to
  822. mov reg1, mem1 mov reg1, mem1
  823. mov mem2, reg1 cmp reg1, mem2
  824. cmp mem1, reg1
  825. }
  826. begin
  827. asml.remove(hp2);
  828. hp2.free;
  829. taicpu(hp1).opcode := A_CMP;
  830. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  831. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  832. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  833. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  834. end;
  835. ReleaseUsedRegs(TmpUsedRegs);
  836. end;
  837. end
  838. else if (taicpu(p).oper[1]^.typ=top_ref) and
  839. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  840. begin
  841. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  842. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  843. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  844. end
  845. else
  846. begin
  847. CopyUsedRegs(TmpUsedRegs);
  848. if GetNextInstruction(hp1, hp2) and
  849. MatchOpType(taicpu(p),top_ref,top_reg) and
  850. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  851. (taicpu(hp1).oper[1]^.typ = top_ref) and
  852. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  853. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  854. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  855. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  856. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  857. { mov mem1, %reg1
  858. mov %reg1, mem2
  859. mov mem2, reg2
  860. to:
  861. mov mem1, reg2
  862. mov reg2, mem2}
  863. begin
  864. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  865. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  866. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  867. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  868. asml.remove(hp2);
  869. hp2.free;
  870. end
  871. {$ifdef i386}
  872. { this is enabled for i386 only, as the rules to create the reg sets below
  873. are too complicated for x86-64, so this makes this code too error prone
  874. on x86-64
  875. }
  876. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  877. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  878. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  879. { mov mem1, reg1 mov mem1, reg1
  880. mov reg1, mem2 mov reg1, mem2
  881. mov mem2, reg2 mov mem2, reg1
  882. to: to:
  883. mov mem1, reg1 mov mem1, reg1
  884. mov mem1, reg2 mov reg1, mem2
  885. mov reg1, mem2
  886. or (if mem1 depends on reg1
  887. and/or if mem2 depends on reg2)
  888. to:
  889. mov mem1, reg1
  890. mov reg1, mem2
  891. mov reg1, reg2
  892. }
  893. begin
  894. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  895. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  896. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  897. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  898. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  899. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  900. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  901. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  902. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  903. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  904. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  905. end
  906. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  907. begin
  908. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  909. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  910. end
  911. else
  912. begin
  913. asml.remove(hp2);
  914. hp2.free;
  915. end
  916. {$endif i386}
  917. ;
  918. ReleaseUsedRegs(TmpUsedRegs);
  919. end;
  920. end
  921. (* { movl [mem1],reg1
  922. movl [mem1],reg2
  923. to
  924. movl [mem1],reg1
  925. movl reg1,reg2
  926. }
  927. else if (taicpu(p).oper[0]^.typ = top_ref) and
  928. (taicpu(p).oper[1]^.typ = top_reg) and
  929. (taicpu(hp1).oper[0]^.typ = top_ref) and
  930. (taicpu(hp1).oper[1]^.typ = top_reg) and
  931. (taicpu(p).opsize = taicpu(hp1).opsize) and
  932. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  933. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  934. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  935. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  936. else*)
  937. { movl const1,[mem1]
  938. movl [mem1],reg1
  939. to
  940. movl const1,reg1
  941. movl reg1,[mem1]
  942. }
  943. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  944. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  945. (taicpu(p).opsize = taicpu(hp1).opsize) and
  946. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  947. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  948. begin
  949. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  950. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  951. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  952. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  953. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  954. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  955. end
  956. end
  957. else if (taicpu(p).oper[1]^.typ = top_reg) and
  958. GetNextIntruction_p and
  959. (hp1.typ = ait_instruction) and
  960. GetNextInstruction(hp1, hp2) and
  961. MatchInstruction(hp2,A_MOV,[]) and
  962. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  963. (taicpu(hp2).oper[0]^.typ=top_reg) and
  964. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  965. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  966. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  967. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  968. ) then
  969. { change movsX/movzX reg/ref, reg2
  970. add/sub/or/... reg3/$const, reg2
  971. mov reg2 reg/ref
  972. to add/sub/or/... reg3/$const, reg/ref }
  973. begin
  974. CopyUsedRegs(TmpUsedRegs);
  975. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  976. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  977. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  978. begin
  979. { by example:
  980. movswl %si,%eax movswl %si,%eax p
  981. decl %eax addl %edx,%eax hp1
  982. movw %ax,%si movw %ax,%si hp2
  983. ->
  984. movswl %si,%eax movswl %si,%eax p
  985. decw %eax addw %edx,%eax hp1
  986. movw %ax,%si movw %ax,%si hp2
  987. }
  988. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  989. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  990. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  991. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  992. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  993. {
  994. ->
  995. movswl %si,%eax movswl %si,%eax p
  996. decw %si addw %dx,%si hp1
  997. movw %ax,%si movw %ax,%si hp2
  998. }
  999. case taicpu(hp1).ops of
  1000. 1:
  1001. begin
  1002. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1003. if taicpu(hp1).oper[0]^.typ=top_reg then
  1004. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1005. end;
  1006. 2:
  1007. begin
  1008. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1009. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1010. (taicpu(hp1).opcode<>A_SHL) and
  1011. (taicpu(hp1).opcode<>A_SHR) and
  1012. (taicpu(hp1).opcode<>A_SAR) then
  1013. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1014. end;
  1015. else
  1016. internalerror(2008042701);
  1017. end;
  1018. {
  1019. ->
  1020. decw %si addw %dx,%si p
  1021. }
  1022. asml.remove(p);
  1023. asml.remove(hp2);
  1024. p.Free;
  1025. hp2.Free;
  1026. p := hp1;
  1027. end;
  1028. ReleaseUsedRegs(TmpUsedRegs);
  1029. end
  1030. else if GetNextIntruction_p and
  1031. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1032. GetNextInstruction(hp1, hp2) and
  1033. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1034. MatchOperand(Taicpu(p).oper[0]^,0) and
  1035. (Taicpu(p).oper[1]^.typ = top_reg) and
  1036. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1037. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1038. { mov reg1,0
  1039. bts reg1,operand1 --> mov reg1,operand2
  1040. or reg1,operand2 bts reg1,operand1}
  1041. begin
  1042. Taicpu(hp2).opcode:=A_MOV;
  1043. asml.remove(hp1);
  1044. insertllitem(hp2,hp2.next,hp1);
  1045. asml.remove(p);
  1046. p.free;
  1047. p:=hp1;
  1048. end
  1049. else if GetNextIntruction_p and
  1050. MatchInstruction(hp1,A_LEA,[S_L]) and
  1051. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1052. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1053. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1054. ) or
  1055. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1056. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1057. )
  1058. ) then
  1059. { mov reg1,ref
  1060. lea reg2,[reg1,reg2]
  1061. to
  1062. add reg2,ref}
  1063. begin
  1064. CopyUsedRegs(TmpUsedRegs);
  1065. { reg1 may not be used afterwards }
  1066. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1067. begin
  1068. Taicpu(hp1).opcode:=A_ADD;
  1069. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1070. DebugMsg('Peephole MovLea2Add done',hp1);
  1071. asml.remove(p);
  1072. p.free;
  1073. p:=hp1;
  1074. end;
  1075. ReleaseUsedRegs(TmpUsedRegs);
  1076. end;
  1077. end;
  1078. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1079. var
  1080. TmpUsedRegs : TAllUsedRegs;
  1081. hp1,hp2: tai;
  1082. begin
  1083. Result:=false;
  1084. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1085. GetNextInstruction(p, hp1) and
  1086. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1087. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1088. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1089. or
  1090. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1091. ) and
  1092. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1093. { mov reg1, reg2
  1094. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1095. begin
  1096. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1097. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1098. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1099. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1100. asml.remove(p);
  1101. p.free;
  1102. p := hp1;
  1103. Result:=true;
  1104. exit;
  1105. end
  1106. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1107. GetNextInstruction(p,hp1) and
  1108. (hp1.typ = ait_instruction) and
  1109. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1110. doing it separately in both branches allows to do the cheap checks
  1111. with low probability earlier }
  1112. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1113. GetNextInstruction(hp1,hp2) and
  1114. MatchInstruction(hp2,A_MOV,[])
  1115. ) or
  1116. ((taicpu(hp1).opcode=A_LEA) and
  1117. GetNextInstruction(hp1,hp2) and
  1118. MatchInstruction(hp2,A_MOV,[]) and
  1119. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1120. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1121. ) or
  1122. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1123. taicpu(p).oper[1]^.reg) and
  1124. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1125. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1126. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1127. ) and
  1128. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1129. )
  1130. ) and
  1131. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1132. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1133. begin
  1134. CopyUsedRegs(TmpUsedRegs);
  1135. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1136. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1137. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1138. { change mov (ref), reg
  1139. add/sub/or/... reg2/$const, reg
  1140. mov reg, (ref)
  1141. # release reg
  1142. to add/sub/or/... reg2/$const, (ref) }
  1143. begin
  1144. case taicpu(hp1).opcode of
  1145. A_INC,A_DEC,A_NOT,A_NEG :
  1146. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1147. A_LEA :
  1148. begin
  1149. taicpu(hp1).opcode:=A_ADD;
  1150. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1151. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1152. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1153. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1154. else
  1155. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1156. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1157. DebugMsg('Peephole FoldLea done',hp1);
  1158. end
  1159. else
  1160. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1161. end;
  1162. asml.remove(p);
  1163. asml.remove(hp2);
  1164. p.free;
  1165. hp2.free;
  1166. p := hp1
  1167. end;
  1168. ReleaseUsedRegs(TmpUsedRegs);
  1169. end;
  1170. end;
  1171. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1172. var
  1173. TmpUsedRegs : TAllUsedRegs;
  1174. hp1 : tai;
  1175. begin
  1176. Result:=false;
  1177. if (taicpu(p).ops >= 2) and
  1178. ((taicpu(p).oper[0]^.typ = top_const) or
  1179. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1180. (taicpu(p).oper[1]^.typ = top_reg) and
  1181. ((taicpu(p).ops = 2) or
  1182. ((taicpu(p).oper[2]^.typ = top_reg) and
  1183. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1184. GetLastInstruction(p,hp1) and
  1185. MatchInstruction(hp1,A_MOV,[]) and
  1186. MatchOpType(hp1,top_reg,top_reg) and
  1187. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1188. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1189. begin
  1190. CopyUsedRegs(TmpUsedRegs);
  1191. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1192. { change
  1193. mov reg1,reg2
  1194. imul y,reg2 to imul y,reg1,reg2 }
  1195. begin
  1196. taicpu(p).ops := 3;
  1197. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1198. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1199. DebugMsg('Peephole MovImul2Imul done',p);
  1200. asml.remove(hp1);
  1201. hp1.free;
  1202. result:=true;
  1203. end;
  1204. ReleaseUsedRegs(TmpUsedRegs);
  1205. end;
  1206. end;
  1207. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1208. var
  1209. hp1 : tai;
  1210. begin
  1211. Result:=false;
  1212. if not(GetNextInstruction(p, hp1)) then
  1213. exit;
  1214. if MatchOpType(p,top_const,top_reg) and
  1215. MatchInstruction(hp1,A_AND,[]) and
  1216. MatchOpType(hp1,top_const,top_reg) and
  1217. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1218. { the second register must contain the first one, so compare their subreg types }
  1219. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1220. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1221. { change
  1222. and const1, reg
  1223. and const2, reg
  1224. to
  1225. and (const1 and const2), reg
  1226. }
  1227. begin
  1228. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1229. DebugMsg('Peephole AndAnd2And done',hp1);
  1230. asml.remove(p);
  1231. p.Free;
  1232. p:=hp1;
  1233. Result:=true;
  1234. exit;
  1235. end
  1236. else if MatchOpType(p,top_const,top_reg) and
  1237. MatchInstruction(hp1,A_MOVZX,[]) and
  1238. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1239. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1240. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1241. (((taicpu(p).opsize=S_W) and
  1242. (taicpu(hp1).opsize=S_BW)) or
  1243. ((taicpu(p).opsize=S_L) and
  1244. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1245. {$ifdef x86_64}
  1246. or
  1247. ((taicpu(p).opsize=S_Q) and
  1248. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1249. {$endif x86_64}
  1250. ) then
  1251. begin
  1252. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1253. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1254. ) or
  1255. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1256. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1257. {$ifdef x86_64}
  1258. or
  1259. (((taicpu(hp1).opsize)=S_LQ) and
  1260. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1261. )
  1262. {$endif x86_64}
  1263. then
  1264. begin
  1265. DebugMsg('Peephole AndMovzToAnd done',p);
  1266. asml.remove(hp1);
  1267. hp1.free;
  1268. end;
  1269. end
  1270. else if MatchOpType(p,top_const,top_reg) and
  1271. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1272. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1273. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1274. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1275. (((taicpu(p).opsize=S_W) and
  1276. (taicpu(hp1).opsize=S_BW)) or
  1277. ((taicpu(p).opsize=S_L) and
  1278. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1279. {$ifdef x86_64}
  1280. or
  1281. ((taicpu(p).opsize=S_Q) and
  1282. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1283. {$endif x86_64}
  1284. ) then
  1285. begin
  1286. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1287. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1288. ) or
  1289. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1290. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1291. {$ifdef x86_64}
  1292. or
  1293. (((taicpu(hp1).opsize)=S_LQ) and
  1294. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1295. )
  1296. {$endif x86_64}
  1297. then
  1298. begin
  1299. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1300. asml.remove(hp1);
  1301. hp1.free;
  1302. end;
  1303. end
  1304. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1305. (hp1.typ = ait_instruction) and
  1306. (taicpu(hp1).is_jmp) and
  1307. (taicpu(hp1).opcode<>A_JMP) and
  1308. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1309. { change
  1310. and x, reg
  1311. jxx
  1312. to
  1313. test x, reg
  1314. jxx
  1315. if reg is deallocated before the
  1316. jump, but only if it's a conditional jump (PFV)
  1317. }
  1318. taicpu(p).opcode := A_TEST;
  1319. end;
  1320. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1321. begin
  1322. if MatchOperand(taicpu(p).oper[0]^,0) and
  1323. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1324. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1325. { change "mov $0, %reg" into "xor %reg, %reg" }
  1326. begin
  1327. taicpu(p).opcode := A_XOR;
  1328. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1329. end;
  1330. end;
  1331. end.