aoptx86.pas 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure PostPeepholeOptMov(const p : tai);
  39. function OptPass1AND(var p : tai) : boolean;
  40. function OptPass1VMOVAP(var p : tai) : boolean;
  41. function OptPass1VOP(const p : tai) : boolean;
  42. function OptPass1MOV(var p : tai) : boolean;
  43. function OptPass2MOV(var p : tai) : boolean;
  44. function OptPass2Imul(var p : tai) : boolean;
  45. function OptPass2Jmp(var p : tai) : boolean;
  46. function OptPass2Jcc(var p : tai) : boolean;
  47. procedure DebugMsg(const s : string; p : tai);inline;
  48. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  49. class function IsExitCode(p : tai) : boolean;
  50. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  51. procedure RemoveLastDeallocForFuncRes(p : tai);
  52. end;
  53. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  54. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  55. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  56. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  57. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  58. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  59. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  60. function RefsEqual(const r1, r2: treference): boolean;
  61. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  62. { returns true, if ref is a reference using only the registers passed as base and index
  63. and having an offset }
  64. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  65. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  66. implementation
  67. uses
  68. cutils,verbose,
  69. globals,
  70. cpuinfo,
  71. procinfo,
  72. aasmbase,
  73. aoptutils,
  74. symconst,symsym,
  75. itcpugas;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((opsize = []) or (taicpu(instr).opsize in opsize));
  82. end;
  83. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  84. begin
  85. result :=
  86. (instr.typ = ait_instruction) and
  87. ((taicpu(instr).opcode = op1) or
  88. (taicpu(instr).opcode = op2)
  89. ) and
  90. ((opsize = []) or (taicpu(instr).opsize in opsize));
  91. end;
  92. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  93. begin
  94. result :=
  95. (instr.typ = ait_instruction) and
  96. ((taicpu(instr).opcode = op1) or
  97. (taicpu(instr).opcode = op2) or
  98. (taicpu(instr).opcode = op3)
  99. ) and
  100. ((opsize = []) or (taicpu(instr).opsize in opsize));
  101. end;
  102. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  103. const opsize : topsizes) : boolean;
  104. var
  105. op : TAsmOp;
  106. begin
  107. result:=false;
  108. for op in ops do
  109. begin
  110. if (instr.typ = ait_instruction) and
  111. (taicpu(instr).opcode = op) and
  112. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  113. begin
  114. result:=true;
  115. exit;
  116. end;
  117. end;
  118. end;
  119. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  120. begin
  121. result := (oper.typ = top_reg) and (oper.reg = reg);
  122. end;
  123. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  124. begin
  125. result := (oper.typ = top_const) and (oper.val = a);
  126. end;
  127. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  128. begin
  129. result := oper1.typ = oper2.typ;
  130. if result then
  131. case oper1.typ of
  132. top_const:
  133. Result:=oper1.val = oper2.val;
  134. top_reg:
  135. Result:=oper1.reg = oper2.reg;
  136. top_ref:
  137. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  138. else
  139. internalerror(2013102801);
  140. end
  141. end;
  142. function RefsEqual(const r1, r2: treference): boolean;
  143. begin
  144. RefsEqual :=
  145. (r1.offset = r2.offset) and
  146. (r1.segment = r2.segment) and (r1.base = r2.base) and
  147. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  148. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  149. (r1.relsymbol = r2.relsymbol);
  150. end;
  151. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  152. begin
  153. Result:=(ref.offset=0) and
  154. (ref.scalefactor in [0,1]) and
  155. (ref.segment=NR_NO) and
  156. (ref.symbol=nil) and
  157. (ref.relsymbol=nil) and
  158. ((base=NR_INVALID) or
  159. (ref.base=base)) and
  160. ((index=NR_INVALID) or
  161. (ref.index=index));
  162. end;
  163. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  164. begin
  165. Result:=(ref.scalefactor in [0,1]) and
  166. (ref.segment=NR_NO) and
  167. (ref.symbol=nil) and
  168. (ref.relsymbol=nil) and
  169. ((base=NR_INVALID) or
  170. (ref.base=base)) and
  171. ((index=NR_INVALID) or
  172. (ref.index=index));
  173. end;
  174. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  175. begin
  176. Result:=(taicpu(instr).ops=2) and
  177. (taicpu(instr).oper[0]^.typ=ot0) and
  178. (taicpu(instr).oper[1]^.typ=ot1);
  179. end;
  180. {$ifdef DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  182. begin
  183. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  184. end;
  185. {$else DEBUG_AOPTCPU}
  186. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  187. begin
  188. end;
  189. {$endif DEBUG_AOPTCPU}
  190. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  191. begin
  192. if not SuperRegistersEqual(reg1,reg2) then
  193. exit(false);
  194. if getregtype(reg1)<>R_INTREGISTER then
  195. exit(true); {because SuperRegisterEqual is true}
  196. case getsubreg(reg1) of
  197. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  198. higher, it preserves the high bits, so the new value depends on
  199. reg2's previous value. In other words, it is equivalent to doing:
  200. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  201. R_SUBL:
  202. exit(getsubreg(reg2)=R_SUBL);
  203. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  204. higher, it actually does a:
  205. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  206. R_SUBH:
  207. exit(getsubreg(reg2)=R_SUBH);
  208. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  209. bits of reg2:
  210. reg2 := (reg2 and $ffff0000) or word(reg1); }
  211. R_SUBW:
  212. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  213. { a write to R_SUBD always overwrites every other subregister,
  214. because it clears the high 32 bits of R_SUBQ on x86_64 }
  215. R_SUBD,
  216. R_SUBQ:
  217. exit(true);
  218. else
  219. internalerror(2017042801);
  220. end;
  221. end;
  222. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  223. begin
  224. if not SuperRegistersEqual(reg1,reg2) then
  225. exit(false);
  226. if getregtype(reg1)<>R_INTREGISTER then
  227. exit(true); {because SuperRegisterEqual is true}
  228. case getsubreg(reg1) of
  229. R_SUBL:
  230. exit(getsubreg(reg2)<>R_SUBH);
  231. R_SUBH:
  232. exit(getsubreg(reg2)<>R_SUBL);
  233. R_SUBW,
  234. R_SUBD,
  235. R_SUBQ:
  236. exit(true);
  237. else
  238. internalerror(2017042802);
  239. end;
  240. end;
  241. { allocates register reg between (and including) instructions p1 and p2
  242. the type of p1 and p2 must not be in SkipInstr
  243. note that this routine is both called from the peephole optimizer
  244. where optinfo is not yet initialised) and from the cse (where it is) }
  245. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  246. var
  247. hp, start: tai;
  248. removedsomething,
  249. firstRemovedWasAlloc,
  250. lastRemovedWasDealloc: boolean;
  251. begin
  252. {$ifdef EXTDEBUG}
  253. { if assigned(p1.optinfo) and
  254. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  255. internalerror(2004101010); }
  256. {$endif EXTDEBUG}
  257. start := p1;
  258. if (reg = NR_ESP) or
  259. (reg = current_procinfo.framepointer) or
  260. not(assigned(p1)) then
  261. { this happens with registers which are loaded implicitely, outside the }
  262. { current block (e.g. esi with self) }
  263. exit;
  264. { make sure we allocate it for this instruction }
  265. getnextinstruction(p2,p2);
  266. lastRemovedWasDealloc := false;
  267. removedSomething := false;
  268. firstRemovedWasAlloc := false;
  269. {$ifdef allocregdebug}
  270. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  271. ' from here...'));
  272. insertllitem(asml,p1.previous,p1,hp);
  273. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  274. ' till here...'));
  275. insertllitem(asml,p2,p2.next,hp);
  276. {$endif allocregdebug}
  277. { do it the safe way: always allocate the full super register,
  278. as we do no register re-allocation in the peephole optimizer,
  279. this does not hurt
  280. }
  281. case getregtype(reg) of
  282. R_MMREGISTER:
  283. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  284. R_INTREGISTER:
  285. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  286. end;
  287. if not(RegInUsedRegs(reg,initialusedregs)) then
  288. begin
  289. hp := tai_regalloc.alloc(reg,nil);
  290. insertllItem(p1.previous,p1,hp);
  291. IncludeRegInUsedRegs(reg,initialusedregs);
  292. end;
  293. while assigned(p1) and
  294. (p1 <> p2) do
  295. begin
  296. if assigned(p1.optinfo) then
  297. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  298. p1 := tai(p1.next);
  299. repeat
  300. while assigned(p1) and
  301. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  302. p1 := tai(p1.next);
  303. { remove all allocation/deallocation info about the register in between }
  304. if assigned(p1) and
  305. (p1.typ = ait_regalloc) then
  306. begin
  307. { same super register, different sub register? }
  308. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  309. begin
  310. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  311. internalerror(2016101501);
  312. tai_regalloc(p1).reg:=reg;
  313. end;
  314. if tai_regalloc(p1).reg=reg then
  315. begin
  316. if not removedSomething then
  317. begin
  318. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  319. removedSomething := true;
  320. end;
  321. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  322. hp := tai(p1.Next);
  323. asml.Remove(p1);
  324. p1.free;
  325. p1 := hp;
  326. end
  327. else
  328. p1 := tai(p1.next);
  329. end;
  330. until not(assigned(p1)) or
  331. not(p1.typ in SkipInstr);
  332. end;
  333. if assigned(p1) then
  334. begin
  335. if firstRemovedWasAlloc then
  336. begin
  337. hp := tai_regalloc.Alloc(reg,nil);
  338. insertLLItem(start.previous,start,hp);
  339. end;
  340. if lastRemovedWasDealloc then
  341. begin
  342. hp := tai_regalloc.DeAlloc(reg,nil);
  343. insertLLItem(p1.previous,p1,hp);
  344. end;
  345. end;
  346. end;
  347. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  348. var
  349. p: taicpu;
  350. begin
  351. if not assigned(hp) or
  352. (hp.typ <> ait_instruction) then
  353. begin
  354. Result := false;
  355. exit;
  356. end;
  357. p := taicpu(hp);
  358. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  359. with insprop[p.opcode] do
  360. begin
  361. case getsubreg(reg) of
  362. R_SUBW,R_SUBD,R_SUBQ:
  363. Result:=
  364. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  365. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  366. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  367. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  368. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  369. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  370. R_SUBFLAGCARRY:
  371. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  372. R_SUBFLAGPARITY:
  373. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  374. R_SUBFLAGAUXILIARY:
  375. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  376. R_SUBFLAGZERO:
  377. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  378. R_SUBFLAGSIGN:
  379. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  380. R_SUBFLAGOVERFLOW:
  381. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  382. R_SUBFLAGINTERRUPT:
  383. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  384. R_SUBFLAGDIRECTION:
  385. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  386. else
  387. internalerror(2017050501);
  388. end;
  389. exit;
  390. end;
  391. Result :=
  392. (((p.opcode = A_MOV) or
  393. (p.opcode = A_MOVZX) or
  394. (p.opcode = A_MOVSX) or
  395. (p.opcode = A_LEA) or
  396. (p.opcode = A_VMOVSS) or
  397. (p.opcode = A_VMOVSD) or
  398. (p.opcode = A_VMOVAPD) or
  399. (p.opcode = A_VMOVAPS) or
  400. (p.opcode = A_VMOVQ) or
  401. (p.opcode = A_MOVSS) or
  402. (p.opcode = A_MOVSD) or
  403. (p.opcode = A_MOVQ) or
  404. (p.opcode = A_MOVAPD) or
  405. (p.opcode = A_MOVAPS) or
  406. {$ifndef x86_64}
  407. (p.opcode = A_LDS) or
  408. (p.opcode = A_LES) or
  409. {$endif not x86_64}
  410. (p.opcode = A_LFS) or
  411. (p.opcode = A_LGS) or
  412. (p.opcode = A_LSS)) and
  413. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  414. (p.oper[1]^.typ = top_reg) and
  415. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  416. ((p.oper[0]^.typ = top_const) or
  417. ((p.oper[0]^.typ = top_reg) and
  418. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  419. ((p.oper[0]^.typ = top_ref) and
  420. not RegInRef(reg,p.oper[0]^.ref^)))) or
  421. ((p.opcode = A_POP) and
  422. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  423. ((p.opcode = A_IMUL) and
  424. (p.ops=3) and
  425. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  426. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  427. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  428. ((((p.opcode = A_IMUL) or
  429. (p.opcode = A_MUL)) and
  430. (p.ops=1)) and
  431. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  432. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  433. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  434. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  435. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  436. {$ifdef x86_64}
  437. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  438. {$endif x86_64}
  439. )) or
  440. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  441. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  442. {$ifdef x86_64}
  443. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  444. {$endif x86_64}
  445. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  446. {$ifndef x86_64}
  447. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  448. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  449. {$endif not x86_64}
  450. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  451. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  452. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  453. {$ifndef x86_64}
  454. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  455. {$endif not x86_64}
  456. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  457. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  458. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  459. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  460. {$ifdef x86_64}
  461. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  462. {$endif x86_64}
  463. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  464. (((p.opcode = A_FSTSW) or
  465. (p.opcode = A_FNSTSW)) and
  466. (p.oper[0]^.typ=top_reg) and
  467. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  468. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  469. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  470. (p.oper[0]^.reg=p.oper[1]^.reg) and
  471. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  472. end;
  473. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  474. var
  475. hp2,hp3 : tai;
  476. begin
  477. result:=(p.typ=ait_instruction) and
  478. ((taicpu(p).opcode = A_RET) or
  479. ((taicpu(p).opcode=A_LEAVE) and
  480. GetNextInstruction(p,hp2) and
  481. (hp2.typ=ait_instruction) and
  482. (taicpu(hp2).opcode=A_RET)
  483. ) or
  484. ((taicpu(p).opcode=A_MOV) and
  485. (taicpu(p).oper[0]^.typ=top_reg) and
  486. (taicpu(p).oper[0]^.reg=NR_EBP) and
  487. (taicpu(p).oper[1]^.typ=top_reg) and
  488. (taicpu(p).oper[1]^.reg=NR_ESP) and
  489. GetNextInstruction(p,hp2) and
  490. (hp2.typ=ait_instruction) and
  491. (taicpu(hp2).opcode=A_POP) and
  492. (taicpu(hp2).oper[0]^.typ=top_reg) and
  493. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  494. GetNextInstruction(hp2,hp3) and
  495. (hp3.typ=ait_instruction) and
  496. (taicpu(hp3).opcode=A_RET)
  497. )
  498. );
  499. end;
  500. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  501. begin
  502. isFoldableArithOp := False;
  503. case hp1.opcode of
  504. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  505. isFoldableArithOp :=
  506. ((taicpu(hp1).oper[0]^.typ = top_const) or
  507. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  508. (taicpu(hp1).oper[0]^.reg <> reg))) and
  509. (taicpu(hp1).oper[1]^.typ = top_reg) and
  510. (taicpu(hp1).oper[1]^.reg = reg);
  511. A_INC,A_DEC,A_NEG,A_NOT:
  512. isFoldableArithOp :=
  513. (taicpu(hp1).oper[0]^.typ = top_reg) and
  514. (taicpu(hp1).oper[0]^.reg = reg);
  515. end;
  516. end;
  517. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  518. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  519. var
  520. hp2: tai;
  521. begin
  522. hp2 := p;
  523. repeat
  524. hp2 := tai(hp2.previous);
  525. if assigned(hp2) and
  526. (hp2.typ = ait_regalloc) and
  527. (tai_regalloc(hp2).ratype=ra_dealloc) and
  528. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  529. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  530. begin
  531. asml.remove(hp2);
  532. hp2.free;
  533. break;
  534. end;
  535. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  536. end;
  537. begin
  538. case current_procinfo.procdef.returndef.typ of
  539. arraydef,recorddef,pointerdef,
  540. stringdef,enumdef,procdef,objectdef,errordef,
  541. filedef,setdef,procvardef,
  542. classrefdef,forwarddef:
  543. DoRemoveLastDeallocForFuncRes(RS_EAX);
  544. orddef:
  545. if current_procinfo.procdef.returndef.size <> 0 then
  546. begin
  547. DoRemoveLastDeallocForFuncRes(RS_EAX);
  548. { for int64/qword }
  549. if current_procinfo.procdef.returndef.size = 8 then
  550. DoRemoveLastDeallocForFuncRes(RS_EDX);
  551. end;
  552. end;
  553. end;
  554. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  555. var
  556. TmpUsedRegs : TAllUsedRegs;
  557. hp1,hp2 : tai;
  558. begin
  559. result:=false;
  560. if MatchOpType(taicpu(p),top_reg,top_reg) then
  561. begin
  562. { vmova* reg1,reg1
  563. =>
  564. <nop> }
  565. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  566. begin
  567. GetNextInstruction(p,hp1);
  568. asml.Remove(p);
  569. p.Free;
  570. p:=hp1;
  571. result:=true;
  572. end
  573. else if GetNextInstruction(p,hp1) then
  574. begin
  575. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  576. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  577. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  578. begin
  579. { vmova* reg1,reg2
  580. vmova* reg2,reg3
  581. dealloc reg2
  582. =>
  583. vmova* reg1,reg3 }
  584. CopyUsedRegs(TmpUsedRegs);
  585. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  586. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  587. begin
  588. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  589. asml.Remove(hp1);
  590. hp1.Free;
  591. result:=true;
  592. end
  593. { special case:
  594. vmova* reg1,reg2
  595. vmova* reg2,reg1
  596. =>
  597. vmova* reg1,reg2 }
  598. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  599. begin
  600. asml.Remove(hp1);
  601. hp1.Free;
  602. result:=true;
  603. end
  604. end
  605. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  606. { we mix single and double opperations here because we assume that the compiler
  607. generates vmovapd only after double operations and vmovaps only after single operations }
  608. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  609. GetNextInstruction(hp1,hp2) and
  610. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  611. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  612. begin
  613. CopyUsedRegs(TmpUsedRegs);
  614. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  615. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  616. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  617. then
  618. begin
  619. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  620. asml.Remove(p);
  621. p.Free;
  622. asml.Remove(hp2);
  623. hp2.Free;
  624. p:=hp1;
  625. end;
  626. end;
  627. end;
  628. end;
  629. end;
  630. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  631. var
  632. TmpUsedRegs : TAllUsedRegs;
  633. hp1 : tai;
  634. begin
  635. result:=false;
  636. if GetNextInstruction(p,hp1) and
  637. { we mix single and double opperations here because we assume that the compiler
  638. generates vmovapd only after double operations and vmovaps only after single operations }
  639. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  640. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  641. (taicpu(hp1).oper[1]^.typ=top_reg) then
  642. begin
  643. CopyUsedRegs(TmpUsedRegs);
  644. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  645. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  646. ) then
  647. begin
  648. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  649. asml.Remove(hp1);
  650. hp1.Free;
  651. result:=true;
  652. end;
  653. end;
  654. end;
  655. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  656. var
  657. hp1, hp2: tai;
  658. TmpUsedRegs : TAllUsedRegs;
  659. GetNextIntruction_p : Boolean;
  660. begin
  661. Result:=false;
  662. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  663. if GetNextIntruction_p and
  664. MatchInstruction(hp1,A_AND,[]) and
  665. (taicpu(p).oper[1]^.typ = top_reg) and
  666. MatchOpType(taicpu(hp1),top_const,top_reg) and
  667. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  668. case taicpu(p).opsize Of
  669. S_L:
  670. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  671. begin
  672. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  673. asml.remove(hp1);
  674. hp1.free;
  675. Result:=true;
  676. exit;
  677. end;
  678. end
  679. else if GetNextIntruction_p and
  680. MatchInstruction(hp1,A_MOV,[]) and
  681. (taicpu(p).oper[1]^.typ = top_reg) and
  682. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  683. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  684. begin
  685. CopyUsedRegs(TmpUsedRegs);
  686. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  687. { we have
  688. mov x, %treg
  689. mov %treg, y
  690. }
  691. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  692. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  693. { we've got
  694. mov x, %treg
  695. mov %treg, y
  696. with %treg is not used after }
  697. case taicpu(p).oper[0]^.typ Of
  698. top_reg:
  699. begin
  700. { change
  701. mov %reg, %treg
  702. mov %treg, y
  703. to
  704. mov %reg, y
  705. }
  706. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  707. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  708. asml.remove(hp1);
  709. hp1.free;
  710. ReleaseUsedRegs(TmpUsedRegs);
  711. Exit;
  712. end;
  713. top_ref:
  714. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  715. begin
  716. { change
  717. mov mem, %treg
  718. mov %treg, %reg
  719. to
  720. mov mem, %reg"
  721. }
  722. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  723. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  724. asml.remove(hp1);
  725. hp1.free;
  726. ReleaseUsedRegs(TmpUsedRegs);
  727. Exit;
  728. end;
  729. end;
  730. ReleaseUsedRegs(TmpUsedRegs);
  731. end
  732. else
  733. { Change
  734. mov %reg1, %reg2
  735. xxx %reg2, ???
  736. to
  737. mov %reg1, %reg2
  738. xxx %reg1, ???
  739. to avoid a write/read penalty
  740. }
  741. if MatchOpType(taicpu(p),top_reg,top_reg) and
  742. GetNextInstruction(p,hp1) and
  743. (tai(hp1).typ = ait_instruction) and
  744. (taicpu(hp1).ops >= 1) and
  745. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  746. { we have
  747. mov %reg1, %reg2
  748. XXX %reg2, ???
  749. }
  750. begin
  751. if ((taicpu(hp1).opcode = A_OR) or
  752. (taicpu(hp1).opcode = A_TEST)) and
  753. (taicpu(hp1).oper[1]^.typ = top_reg) and
  754. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  755. { we have
  756. mov %reg1, %reg2
  757. test/or %reg2, %reg2
  758. }
  759. begin
  760. CopyUsedRegs(TmpUsedRegs);
  761. { reg1 will be used after the first instruction,
  762. so update the allocation info }
  763. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  764. if GetNextInstruction(hp1, hp2) and
  765. (hp2.typ = ait_instruction) and
  766. taicpu(hp2).is_jmp and
  767. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  768. { change
  769. mov %reg1, %reg2
  770. test/or %reg2, %reg2
  771. jxx
  772. to
  773. test %reg1, %reg1
  774. jxx
  775. }
  776. begin
  777. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  778. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  779. asml.remove(p);
  780. p.free;
  781. p := hp1;
  782. ReleaseUsedRegs(TmpUsedRegs);
  783. Exit;
  784. end
  785. else
  786. { change
  787. mov %reg1, %reg2
  788. test/or %reg2, %reg2
  789. to
  790. mov %reg1, %reg2
  791. test/or %reg1, %reg1
  792. }
  793. begin
  794. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  795. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  796. end;
  797. ReleaseUsedRegs(TmpUsedRegs);
  798. end
  799. end
  800. else
  801. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  802. x >= RetOffset) as it doesn't do anything (it writes either to a
  803. parameter or to the temporary storage room for the function
  804. result)
  805. }
  806. if GetNextIntruction_p and
  807. (tai(hp1).typ = ait_instruction) then
  808. begin
  809. if IsExitCode(hp1) and
  810. MatchOpType(p,top_reg,top_ref) and
  811. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  812. not(assigned(current_procinfo.procdef.funcretsym) and
  813. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  814. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  815. begin
  816. asml.remove(p);
  817. p.free;
  818. p:=hp1;
  819. DebugMsg('Peephole removed deadstore before leave/ret',p);
  820. RemoveLastDeallocForFuncRes(p);
  821. exit;
  822. end
  823. { change
  824. mov reg1, mem1
  825. test/cmp x, mem1
  826. to
  827. mov reg1, mem1
  828. test/cmp x, reg1
  829. }
  830. else if MatchOpType(p,top_reg,top_ref) and
  831. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  832. (taicpu(hp1).oper[1]^.typ = top_ref) and
  833. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  834. begin
  835. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  836. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  837. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  838. end;
  839. end;
  840. { Next instruction is also a MOV ? }
  841. if GetNextIntruction_p and
  842. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  843. begin
  844. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  845. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  846. { mov reg1, mem1 or mov mem1, reg1
  847. mov mem2, reg2 mov reg2, mem2}
  848. begin
  849. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  850. { mov reg1, mem1 or mov mem1, reg1
  851. mov mem2, reg1 mov reg2, mem1}
  852. begin
  853. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  854. { Removes the second statement from
  855. mov reg1, mem1/reg2
  856. mov mem1/reg2, reg1 }
  857. begin
  858. if taicpu(p).oper[0]^.typ=top_reg then
  859. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  860. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  861. asml.remove(hp1);
  862. hp1.free;
  863. Result:=true;
  864. exit;
  865. end
  866. else
  867. begin
  868. CopyUsedRegs(TmpUsedRegs);
  869. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  870. if (taicpu(p).oper[1]^.typ = top_ref) and
  871. { mov reg1, mem1
  872. mov mem2, reg1 }
  873. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  874. GetNextInstruction(hp1, hp2) and
  875. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  876. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  877. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  878. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  879. { change to
  880. mov reg1, mem1 mov reg1, mem1
  881. mov mem2, reg1 cmp reg1, mem2
  882. cmp mem1, reg1
  883. }
  884. begin
  885. asml.remove(hp2);
  886. hp2.free;
  887. taicpu(hp1).opcode := A_CMP;
  888. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  889. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  890. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  891. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  892. end;
  893. ReleaseUsedRegs(TmpUsedRegs);
  894. end;
  895. end
  896. else if (taicpu(p).oper[1]^.typ=top_ref) and
  897. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  898. begin
  899. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  900. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  901. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  902. end
  903. else
  904. begin
  905. CopyUsedRegs(TmpUsedRegs);
  906. if GetNextInstruction(hp1, hp2) and
  907. MatchOpType(taicpu(p),top_ref,top_reg) and
  908. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  909. (taicpu(hp1).oper[1]^.typ = top_ref) and
  910. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  911. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  912. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  913. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  914. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  915. { mov mem1, %reg1
  916. mov %reg1, mem2
  917. mov mem2, reg2
  918. to:
  919. mov mem1, reg2
  920. mov reg2, mem2}
  921. begin
  922. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  923. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  924. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  925. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  926. asml.remove(hp2);
  927. hp2.free;
  928. end
  929. {$ifdef i386}
  930. { this is enabled for i386 only, as the rules to create the reg sets below
  931. are too complicated for x86-64, so this makes this code too error prone
  932. on x86-64
  933. }
  934. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  935. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  936. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  937. { mov mem1, reg1 mov mem1, reg1
  938. mov reg1, mem2 mov reg1, mem2
  939. mov mem2, reg2 mov mem2, reg1
  940. to: to:
  941. mov mem1, reg1 mov mem1, reg1
  942. mov mem1, reg2 mov reg1, mem2
  943. mov reg1, mem2
  944. or (if mem1 depends on reg1
  945. and/or if mem2 depends on reg2)
  946. to:
  947. mov mem1, reg1
  948. mov reg1, mem2
  949. mov reg1, reg2
  950. }
  951. begin
  952. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  953. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  954. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  955. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  956. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  957. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  958. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  959. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  960. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  961. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  962. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  963. end
  964. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  965. begin
  966. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  967. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  968. end
  969. else
  970. begin
  971. asml.remove(hp2);
  972. hp2.free;
  973. end
  974. {$endif i386}
  975. ;
  976. ReleaseUsedRegs(TmpUsedRegs);
  977. end;
  978. end
  979. (* { movl [mem1],reg1
  980. movl [mem1],reg2
  981. to
  982. movl [mem1],reg1
  983. movl reg1,reg2
  984. }
  985. else if (taicpu(p).oper[0]^.typ = top_ref) and
  986. (taicpu(p).oper[1]^.typ = top_reg) and
  987. (taicpu(hp1).oper[0]^.typ = top_ref) and
  988. (taicpu(hp1).oper[1]^.typ = top_reg) and
  989. (taicpu(p).opsize = taicpu(hp1).opsize) and
  990. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  991. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  992. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  993. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  994. else*)
  995. { movl const1,[mem1]
  996. movl [mem1],reg1
  997. to
  998. movl const1,reg1
  999. movl reg1,[mem1]
  1000. }
  1001. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1002. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1003. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1004. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1005. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1006. begin
  1007. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1008. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1009. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1010. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1011. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1012. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1013. end
  1014. end
  1015. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1016. GetNextIntruction_p and
  1017. (hp1.typ = ait_instruction) and
  1018. GetNextInstruction(hp1, hp2) and
  1019. MatchInstruction(hp2,A_MOV,[]) and
  1020. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1021. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1022. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1023. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1024. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1025. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1026. ) then
  1027. { change movsX/movzX reg/ref, reg2
  1028. add/sub/or/... reg3/$const, reg2
  1029. mov reg2 reg/ref
  1030. to add/sub/or/... reg3/$const, reg/ref }
  1031. begin
  1032. CopyUsedRegs(TmpUsedRegs);
  1033. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1034. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1035. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1036. begin
  1037. { by example:
  1038. movswl %si,%eax movswl %si,%eax p
  1039. decl %eax addl %edx,%eax hp1
  1040. movw %ax,%si movw %ax,%si hp2
  1041. ->
  1042. movswl %si,%eax movswl %si,%eax p
  1043. decw %eax addw %edx,%eax hp1
  1044. movw %ax,%si movw %ax,%si hp2
  1045. }
  1046. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1047. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1048. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1049. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1050. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1051. {
  1052. ->
  1053. movswl %si,%eax movswl %si,%eax p
  1054. decw %si addw %dx,%si hp1
  1055. movw %ax,%si movw %ax,%si hp2
  1056. }
  1057. case taicpu(hp1).ops of
  1058. 1:
  1059. begin
  1060. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1061. if taicpu(hp1).oper[0]^.typ=top_reg then
  1062. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1063. end;
  1064. 2:
  1065. begin
  1066. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1067. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1068. (taicpu(hp1).opcode<>A_SHL) and
  1069. (taicpu(hp1).opcode<>A_SHR) and
  1070. (taicpu(hp1).opcode<>A_SAR) then
  1071. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1072. end;
  1073. else
  1074. internalerror(2008042701);
  1075. end;
  1076. {
  1077. ->
  1078. decw %si addw %dx,%si p
  1079. }
  1080. asml.remove(p);
  1081. asml.remove(hp2);
  1082. p.Free;
  1083. hp2.Free;
  1084. p := hp1;
  1085. end;
  1086. ReleaseUsedRegs(TmpUsedRegs);
  1087. end
  1088. else if GetNextIntruction_p and
  1089. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1090. GetNextInstruction(hp1, hp2) and
  1091. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1092. MatchOperand(Taicpu(p).oper[0]^,0) and
  1093. (Taicpu(p).oper[1]^.typ = top_reg) and
  1094. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1095. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1096. { mov reg1,0
  1097. bts reg1,operand1 --> mov reg1,operand2
  1098. or reg1,operand2 bts reg1,operand1}
  1099. begin
  1100. Taicpu(hp2).opcode:=A_MOV;
  1101. asml.remove(hp1);
  1102. insertllitem(hp2,hp2.next,hp1);
  1103. asml.remove(p);
  1104. p.free;
  1105. p:=hp1;
  1106. end
  1107. else if GetNextIntruction_p and
  1108. MatchInstruction(hp1,A_LEA,[S_L]) and
  1109. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1110. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1111. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1112. ) or
  1113. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1114. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1115. )
  1116. ) then
  1117. { mov reg1,ref
  1118. lea reg2,[reg1,reg2]
  1119. to
  1120. add reg2,ref}
  1121. begin
  1122. CopyUsedRegs(TmpUsedRegs);
  1123. { reg1 may not be used afterwards }
  1124. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1125. begin
  1126. Taicpu(hp1).opcode:=A_ADD;
  1127. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1128. DebugMsg('Peephole MovLea2Add done',hp1);
  1129. asml.remove(p);
  1130. p.free;
  1131. p:=hp1;
  1132. end;
  1133. ReleaseUsedRegs(TmpUsedRegs);
  1134. end;
  1135. end;
  1136. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1137. var
  1138. TmpUsedRegs : TAllUsedRegs;
  1139. hp1,hp2: tai;
  1140. begin
  1141. Result:=false;
  1142. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1143. GetNextInstruction(p, hp1) and
  1144. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1145. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1146. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1147. or
  1148. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1149. ) and
  1150. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1151. { mov reg1, reg2
  1152. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1153. begin
  1154. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1155. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1156. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1157. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1158. asml.remove(p);
  1159. p.free;
  1160. p := hp1;
  1161. Result:=true;
  1162. exit;
  1163. end
  1164. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1165. GetNextInstruction(p,hp1) and
  1166. (hp1.typ = ait_instruction) and
  1167. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1168. doing it separately in both branches allows to do the cheap checks
  1169. with low probability earlier }
  1170. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1171. GetNextInstruction(hp1,hp2) and
  1172. MatchInstruction(hp2,A_MOV,[])
  1173. ) or
  1174. ((taicpu(hp1).opcode=A_LEA) and
  1175. GetNextInstruction(hp1,hp2) and
  1176. MatchInstruction(hp2,A_MOV,[]) and
  1177. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1178. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1179. ) or
  1180. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1181. taicpu(p).oper[1]^.reg) and
  1182. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1183. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1184. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1185. ) and
  1186. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1187. )
  1188. ) and
  1189. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1190. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1191. begin
  1192. CopyUsedRegs(TmpUsedRegs);
  1193. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1194. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1195. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1196. { change mov (ref), reg
  1197. add/sub/or/... reg2/$const, reg
  1198. mov reg, (ref)
  1199. # release reg
  1200. to add/sub/or/... reg2/$const, (ref) }
  1201. begin
  1202. case taicpu(hp1).opcode of
  1203. A_INC,A_DEC,A_NOT,A_NEG :
  1204. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1205. A_LEA :
  1206. begin
  1207. taicpu(hp1).opcode:=A_ADD;
  1208. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1209. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1210. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1211. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1212. else
  1213. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1214. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1215. DebugMsg('Peephole FoldLea done',hp1);
  1216. end
  1217. else
  1218. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1219. end;
  1220. asml.remove(p);
  1221. asml.remove(hp2);
  1222. p.free;
  1223. hp2.free;
  1224. p := hp1
  1225. end;
  1226. ReleaseUsedRegs(TmpUsedRegs);
  1227. end;
  1228. end;
  1229. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1230. var
  1231. TmpUsedRegs : TAllUsedRegs;
  1232. hp1 : tai;
  1233. begin
  1234. Result:=false;
  1235. if (taicpu(p).ops >= 2) and
  1236. ((taicpu(p).oper[0]^.typ = top_const) or
  1237. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1238. (taicpu(p).oper[1]^.typ = top_reg) and
  1239. ((taicpu(p).ops = 2) or
  1240. ((taicpu(p).oper[2]^.typ = top_reg) and
  1241. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1242. GetLastInstruction(p,hp1) and
  1243. MatchInstruction(hp1,A_MOV,[]) and
  1244. MatchOpType(hp1,top_reg,top_reg) and
  1245. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1246. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1247. begin
  1248. CopyUsedRegs(TmpUsedRegs);
  1249. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1250. { change
  1251. mov reg1,reg2
  1252. imul y,reg2 to imul y,reg1,reg2 }
  1253. begin
  1254. taicpu(p).ops := 3;
  1255. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1256. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1257. DebugMsg('Peephole MovImul2Imul done',p);
  1258. asml.remove(hp1);
  1259. hp1.free;
  1260. result:=true;
  1261. end;
  1262. ReleaseUsedRegs(TmpUsedRegs);
  1263. end;
  1264. end;
  1265. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1266. var
  1267. hp1 : tai;
  1268. begin
  1269. {
  1270. change
  1271. jmp .L1
  1272. ...
  1273. .L1:
  1274. ret
  1275. into
  1276. ret
  1277. }
  1278. result:=false;
  1279. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1280. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1281. begin
  1282. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1283. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1284. MatchInstruction(hp1,A_RET,[S_NO]) then
  1285. begin
  1286. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1287. taicpu(p).opcode:=A_RET;
  1288. taicpu(p).is_jmp:=false;
  1289. taicpu(p).ops:=taicpu(hp1).ops;
  1290. case taicpu(hp1).ops of
  1291. 0:
  1292. taicpu(p).clearop(0);
  1293. 1:
  1294. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1295. else
  1296. internalerror(2016041301);
  1297. end;
  1298. result:=true;
  1299. end;
  1300. end;
  1301. end;
  1302. function CanBeCMOV(p : tai) : boolean;
  1303. begin
  1304. CanBeCMOV:=assigned(p) and
  1305. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1306. { we can't use cmov ref,reg because
  1307. ref could be nil and cmov still throws an exception
  1308. if ref=nil but the mov isn't done (FK)
  1309. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1310. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1311. }
  1312. MatchOpType(p,top_reg,top_reg);
  1313. end;
  1314. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1315. var
  1316. hp1,hp2,hp3: tai;
  1317. carryadd_opcode : TAsmOp;
  1318. l : Longint;
  1319. condition : TAsmCond;
  1320. begin
  1321. { jb @@1 cmc
  1322. inc/dec operand --> adc/sbb operand,0
  1323. @@1:
  1324. ... and ...
  1325. jnb @@1
  1326. inc/dec operand --> adc/sbb operand,0
  1327. @@1: }
  1328. result:=false;
  1329. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1330. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1331. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1332. begin
  1333. carryadd_opcode:=A_NONE;
  1334. if Taicpu(p).condition in [C_NAE,C_B] then
  1335. begin
  1336. if Taicpu(hp1).opcode=A_INC then
  1337. carryadd_opcode:=A_ADC;
  1338. if Taicpu(hp1).opcode=A_DEC then
  1339. carryadd_opcode:=A_SBB;
  1340. if carryadd_opcode<>A_NONE then
  1341. begin
  1342. Taicpu(p).clearop(0);
  1343. Taicpu(p).ops:=0;
  1344. Taicpu(p).is_jmp:=false;
  1345. Taicpu(p).opcode:=A_CMC;
  1346. Taicpu(p).condition:=C_NONE;
  1347. Taicpu(hp1).ops:=2;
  1348. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1349. Taicpu(hp1).loadconst(0,0);
  1350. Taicpu(hp1).opcode:=carryadd_opcode;
  1351. result:=true;
  1352. exit;
  1353. end;
  1354. end;
  1355. if Taicpu(p).condition in [C_AE,C_NB] then
  1356. begin
  1357. if Taicpu(hp1).opcode=A_INC then
  1358. carryadd_opcode:=A_ADC;
  1359. if Taicpu(hp1).opcode=A_DEC then
  1360. carryadd_opcode:=A_SBB;
  1361. if carryadd_opcode<>A_NONE then
  1362. begin
  1363. asml.remove(p);
  1364. p.free;
  1365. Taicpu(hp1).ops:=2;
  1366. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1367. Taicpu(hp1).loadconst(0,0);
  1368. Taicpu(hp1).opcode:=carryadd_opcode;
  1369. p:=hp1;
  1370. result:=true;
  1371. exit;
  1372. end;
  1373. end;
  1374. end;
  1375. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1376. begin
  1377. { check for
  1378. jCC xxx
  1379. <several movs>
  1380. xxx:
  1381. }
  1382. l:=0;
  1383. GetNextInstruction(p, hp1);
  1384. while assigned(hp1) and
  1385. CanBeCMOV(hp1) and
  1386. { stop on labels }
  1387. not(hp1.typ=ait_label) do
  1388. begin
  1389. inc(l);
  1390. GetNextInstruction(hp1,hp1);
  1391. end;
  1392. if assigned(hp1) then
  1393. begin
  1394. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1395. begin
  1396. if (l<=4) and (l>0) then
  1397. begin
  1398. condition:=inverse_cond(taicpu(p).condition);
  1399. hp2:=p;
  1400. GetNextInstruction(p,hp1);
  1401. p:=hp1;
  1402. repeat
  1403. taicpu(hp1).opcode:=A_CMOVcc;
  1404. taicpu(hp1).condition:=condition;
  1405. GetNextInstruction(hp1,hp1);
  1406. until not(assigned(hp1)) or
  1407. not(CanBeCMOV(hp1));
  1408. { wait with removing else GetNextInstruction could
  1409. ignore the label if it was the only usage in the
  1410. jump moved away }
  1411. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1412. { if the label refs. reach zero, remove any alignment before the label }
  1413. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1414. begin
  1415. asml.Remove(hp1);
  1416. hp1.Free;
  1417. end;
  1418. asml.remove(hp2);
  1419. hp2.free;
  1420. result:=true;
  1421. exit;
  1422. end;
  1423. end
  1424. else
  1425. begin
  1426. { check further for
  1427. jCC xxx
  1428. <several movs 1>
  1429. jmp yyy
  1430. xxx:
  1431. <several movs 2>
  1432. yyy:
  1433. }
  1434. { hp2 points to jmp yyy }
  1435. hp2:=hp1;
  1436. { skip hp1 to xxx }
  1437. GetNextInstruction(hp1, hp1);
  1438. if assigned(hp2) and
  1439. assigned(hp1) and
  1440. (l<=3) and
  1441. (hp2.typ=ait_instruction) and
  1442. (taicpu(hp2).is_jmp) and
  1443. (taicpu(hp2).condition=C_None) and
  1444. { real label and jump, no further references to the
  1445. label are allowed }
  1446. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1447. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1448. begin
  1449. l:=0;
  1450. { skip hp1 to <several moves 2> }
  1451. GetNextInstruction(hp1, hp1);
  1452. while assigned(hp1) and
  1453. CanBeCMOV(hp1) do
  1454. begin
  1455. inc(l);
  1456. GetNextInstruction(hp1, hp1);
  1457. end;
  1458. { hp1 points to yyy: }
  1459. if assigned(hp1) and
  1460. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1461. begin
  1462. condition:=inverse_cond(taicpu(p).condition);
  1463. GetNextInstruction(p,hp1);
  1464. hp3:=p;
  1465. p:=hp1;
  1466. repeat
  1467. taicpu(hp1).opcode:=A_CMOVcc;
  1468. taicpu(hp1).condition:=condition;
  1469. GetNextInstruction(hp1,hp1);
  1470. until not(assigned(hp1)) or
  1471. not(CanBeCMOV(hp1));
  1472. { hp2 is still at jmp yyy }
  1473. GetNextInstruction(hp2,hp1);
  1474. { hp2 is now at xxx: }
  1475. condition:=inverse_cond(condition);
  1476. GetNextInstruction(hp1,hp1);
  1477. { hp1 is now at <several movs 2> }
  1478. repeat
  1479. taicpu(hp1).opcode:=A_CMOVcc;
  1480. taicpu(hp1).condition:=condition;
  1481. GetNextInstruction(hp1,hp1);
  1482. until not(assigned(hp1)) or
  1483. not(CanBeCMOV(hp1));
  1484. {
  1485. asml.remove(hp1.next)
  1486. hp1.next.free;
  1487. asml.remove(hp1);
  1488. hp1.free;
  1489. }
  1490. { remove jCC }
  1491. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1492. asml.remove(hp3);
  1493. hp3.free;
  1494. { remove jmp }
  1495. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1496. asml.remove(hp2);
  1497. hp2.free;
  1498. result:=true;
  1499. exit;
  1500. end;
  1501. end;
  1502. end;
  1503. end;
  1504. end;
  1505. end;
  1506. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1507. var
  1508. hp1 : tai;
  1509. begin
  1510. Result:=false;
  1511. if not(GetNextInstruction(p, hp1)) then
  1512. exit;
  1513. if MatchOpType(p,top_const,top_reg) and
  1514. MatchInstruction(hp1,A_AND,[]) and
  1515. MatchOpType(hp1,top_const,top_reg) and
  1516. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1517. { the second register must contain the first one, so compare their subreg types }
  1518. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1519. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1520. { change
  1521. and const1, reg
  1522. and const2, reg
  1523. to
  1524. and (const1 and const2), reg
  1525. }
  1526. begin
  1527. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1528. DebugMsg('Peephole AndAnd2And done',hp1);
  1529. asml.remove(p);
  1530. p.Free;
  1531. p:=hp1;
  1532. Result:=true;
  1533. exit;
  1534. end
  1535. else if MatchOpType(p,top_const,top_reg) and
  1536. MatchInstruction(hp1,A_MOVZX,[]) and
  1537. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1538. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1539. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1540. (((taicpu(p).opsize=S_W) and
  1541. (taicpu(hp1).opsize=S_BW)) or
  1542. ((taicpu(p).opsize=S_L) and
  1543. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1544. {$ifdef x86_64}
  1545. or
  1546. ((taicpu(p).opsize=S_Q) and
  1547. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1548. {$endif x86_64}
  1549. ) then
  1550. begin
  1551. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1552. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1553. ) or
  1554. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1555. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1556. {$ifdef x86_64}
  1557. or
  1558. (((taicpu(hp1).opsize)=S_LQ) and
  1559. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1560. )
  1561. {$endif x86_64}
  1562. then
  1563. begin
  1564. DebugMsg('Peephole AndMovzToAnd done',p);
  1565. asml.remove(hp1);
  1566. hp1.free;
  1567. end;
  1568. end
  1569. else if MatchOpType(p,top_const,top_reg) and
  1570. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1571. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1572. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1573. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1574. (((taicpu(p).opsize=S_W) and
  1575. (taicpu(hp1).opsize=S_BW)) or
  1576. ((taicpu(p).opsize=S_L) and
  1577. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1578. {$ifdef x86_64}
  1579. or
  1580. ((taicpu(p).opsize=S_Q) and
  1581. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1582. {$endif x86_64}
  1583. ) then
  1584. begin
  1585. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1586. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1587. ) or
  1588. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1589. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1590. {$ifdef x86_64}
  1591. or
  1592. (((taicpu(hp1).opsize)=S_LQ) and
  1593. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1594. )
  1595. {$endif x86_64}
  1596. then
  1597. begin
  1598. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1599. asml.remove(hp1);
  1600. hp1.free;
  1601. end;
  1602. end
  1603. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1604. (hp1.typ = ait_instruction) and
  1605. (taicpu(hp1).is_jmp) and
  1606. (taicpu(hp1).opcode<>A_JMP) and
  1607. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1608. { change
  1609. and x, reg
  1610. jxx
  1611. to
  1612. test x, reg
  1613. jxx
  1614. if reg is deallocated before the
  1615. jump, but only if it's a conditional jump (PFV)
  1616. }
  1617. taicpu(p).opcode := A_TEST;
  1618. end;
  1619. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1620. begin
  1621. if MatchOperand(taicpu(p).oper[0]^,0) and
  1622. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1623. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1624. { change "mov $0, %reg" into "xor %reg, %reg" }
  1625. begin
  1626. taicpu(p).opcode := A_XOR;
  1627. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1628. end;
  1629. end;
  1630. end.