aoptx86.pas 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure DebugMsg(const s : string; p : tai);inline;
  39. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  40. class function IsExitCode(p : tai) : boolean;
  41. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  42. procedure RemoveLastDeallocForFuncRes(p : tai);
  43. function PrePeepholeOptSxx(var p : tai) : boolean;
  44. function OptPass1AND(var p : tai) : boolean;
  45. function OptPass1VMOVAP(var p : tai) : boolean;
  46. function OptPass1VOP(const p : tai) : boolean;
  47. function OptPass1MOV(var p : tai) : boolean;
  48. function OptPass2MOV(var p : tai) : boolean;
  49. function OptPass2Imul(var p : tai) : boolean;
  50. function OptPass2Jmp(var p : tai) : boolean;
  51. function OptPass2Jcc(var p : tai) : boolean;
  52. procedure PostPeepholeOptMov(const p : tai);
  53. end;
  54. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  55. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  56. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  57. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  58. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  59. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  60. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  61. function RefsEqual(const r1, r2: treference): boolean;
  62. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  63. { returns true, if ref is a reference using only the registers passed as base and index
  64. and having an offset }
  65. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  66. implementation
  67. uses
  68. cutils,verbose,
  69. globals,
  70. cpuinfo,
  71. procinfo,
  72. aasmbase,
  73. aoptutils,
  74. symconst,symsym,
  75. itcpugas;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((opsize = []) or (taicpu(instr).opsize in opsize));
  82. end;
  83. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  84. begin
  85. result :=
  86. (instr.typ = ait_instruction) and
  87. ((taicpu(instr).opcode = op1) or
  88. (taicpu(instr).opcode = op2)
  89. ) and
  90. ((opsize = []) or (taicpu(instr).opsize in opsize));
  91. end;
  92. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  93. begin
  94. result :=
  95. (instr.typ = ait_instruction) and
  96. ((taicpu(instr).opcode = op1) or
  97. (taicpu(instr).opcode = op2) or
  98. (taicpu(instr).opcode = op3)
  99. ) and
  100. ((opsize = []) or (taicpu(instr).opsize in opsize));
  101. end;
  102. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  103. const opsize : topsizes) : boolean;
  104. var
  105. op : TAsmOp;
  106. begin
  107. result:=false;
  108. for op in ops do
  109. begin
  110. if (instr.typ = ait_instruction) and
  111. (taicpu(instr).opcode = op) and
  112. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  113. begin
  114. result:=true;
  115. exit;
  116. end;
  117. end;
  118. end;
  119. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  120. begin
  121. result := (oper.typ = top_reg) and (oper.reg = reg);
  122. end;
  123. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  124. begin
  125. result := (oper.typ = top_const) and (oper.val = a);
  126. end;
  127. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  128. begin
  129. result := oper1.typ = oper2.typ;
  130. if result then
  131. case oper1.typ of
  132. top_const:
  133. Result:=oper1.val = oper2.val;
  134. top_reg:
  135. Result:=oper1.reg = oper2.reg;
  136. top_ref:
  137. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  138. else
  139. internalerror(2013102801);
  140. end
  141. end;
  142. function RefsEqual(const r1, r2: treference): boolean;
  143. begin
  144. RefsEqual :=
  145. (r1.offset = r2.offset) and
  146. (r1.segment = r2.segment) and (r1.base = r2.base) and
  147. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  148. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  149. (r1.relsymbol = r2.relsymbol);
  150. end;
  151. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  152. begin
  153. Result:=(ref.offset=0) and
  154. (ref.scalefactor in [0,1]) and
  155. (ref.segment=NR_NO) and
  156. (ref.symbol=nil) and
  157. (ref.relsymbol=nil) and
  158. ((base=NR_INVALID) or
  159. (ref.base=base)) and
  160. ((index=NR_INVALID) or
  161. (ref.index=index));
  162. end;
  163. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  164. begin
  165. Result:=(ref.scalefactor in [0,1]) and
  166. (ref.segment=NR_NO) and
  167. (ref.symbol=nil) and
  168. (ref.relsymbol=nil) and
  169. ((base=NR_INVALID) or
  170. (ref.base=base)) and
  171. ((index=NR_INVALID) or
  172. (ref.index=index));
  173. end;
  174. {$ifdef DEBUG_AOPTCPU}
  175. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  176. begin
  177. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  178. end;
  179. {$else DEBUG_AOPTCPU}
  180. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  181. begin
  182. end;
  183. {$endif DEBUG_AOPTCPU}
  184. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  185. begin
  186. if not SuperRegistersEqual(reg1,reg2) then
  187. exit(false);
  188. if getregtype(reg1)<>R_INTREGISTER then
  189. exit(true); {because SuperRegisterEqual is true}
  190. case getsubreg(reg1) of
  191. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  192. higher, it preserves the high bits, so the new value depends on
  193. reg2's previous value. In other words, it is equivalent to doing:
  194. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  195. R_SUBL:
  196. exit(getsubreg(reg2)=R_SUBL);
  197. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  198. higher, it actually does a:
  199. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  200. R_SUBH:
  201. exit(getsubreg(reg2)=R_SUBH);
  202. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  203. bits of reg2:
  204. reg2 := (reg2 and $ffff0000) or word(reg1); }
  205. R_SUBW:
  206. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  207. { a write to R_SUBD always overwrites every other subregister,
  208. because it clears the high 32 bits of R_SUBQ on x86_64 }
  209. R_SUBD,
  210. R_SUBQ:
  211. exit(true);
  212. else
  213. internalerror(2017042801);
  214. end;
  215. end;
  216. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  217. begin
  218. if not SuperRegistersEqual(reg1,reg2) then
  219. exit(false);
  220. if getregtype(reg1)<>R_INTREGISTER then
  221. exit(true); {because SuperRegisterEqual is true}
  222. case getsubreg(reg1) of
  223. R_SUBL:
  224. exit(getsubreg(reg2)<>R_SUBH);
  225. R_SUBH:
  226. exit(getsubreg(reg2)<>R_SUBL);
  227. R_SUBW,
  228. R_SUBD,
  229. R_SUBQ:
  230. exit(true);
  231. else
  232. internalerror(2017042802);
  233. end;
  234. end;
  235. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  236. var
  237. hp1 : tai;
  238. l : TCGInt;
  239. begin
  240. result:=false;
  241. { changes the code sequence
  242. shr/sar const1, x
  243. shl const2, x
  244. to
  245. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  246. if GetNextInstruction(p, hp1) and
  247. MatchInstruction(hp1,A_SHL,[]) and
  248. (taicpu(p).oper[0]^.typ = top_const) and
  249. (taicpu(hp1).oper[0]^.typ = top_const) and
  250. (taicpu(hp1).opsize = taicpu(p).opsize) and
  251. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  252. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  253. begin
  254. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  255. not(cs_opt_size in current_settings.optimizerswitches) then
  256. begin
  257. { shr/sar const1, %reg
  258. shl const2, %reg
  259. with const1 > const2 }
  260. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  261. taicpu(hp1).opcode := A_AND;
  262. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  263. case taicpu(p).opsize Of
  264. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  265. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  266. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  267. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  268. else
  269. Internalerror(2017050703)
  270. end;
  271. end
  272. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  273. not(cs_opt_size in current_settings.optimizerswitches) then
  274. begin
  275. { shr/sar const1, %reg
  276. shl const2, %reg
  277. with const1 < const2 }
  278. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  279. taicpu(p).opcode := A_AND;
  280. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  281. case taicpu(p).opsize Of
  282. S_B: taicpu(p).loadConst(0,l Xor $ff);
  283. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  284. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  285. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  286. else
  287. Internalerror(2017050702)
  288. end;
  289. end
  290. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  291. begin
  292. { shr/sar const1, %reg
  293. shl const2, %reg
  294. with const1 = const2 }
  295. taicpu(p).opcode := A_AND;
  296. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  297. case taicpu(p).opsize Of
  298. S_B: taicpu(p).loadConst(0,l Xor $ff);
  299. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  300. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  301. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  302. else
  303. Internalerror(2017050701)
  304. end;
  305. asml.remove(hp1);
  306. hp1.free;
  307. end;
  308. end;
  309. end;
  310. { allocates register reg between (and including) instructions p1 and p2
  311. the type of p1 and p2 must not be in SkipInstr
  312. note that this routine is both called from the peephole optimizer
  313. where optinfo is not yet initialised) and from the cse (where it is) }
  314. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  315. var
  316. hp, start: tai;
  317. removedsomething,
  318. firstRemovedWasAlloc,
  319. lastRemovedWasDealloc: boolean;
  320. begin
  321. {$ifdef EXTDEBUG}
  322. { if assigned(p1.optinfo) and
  323. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  324. internalerror(2004101010); }
  325. {$endif EXTDEBUG}
  326. start := p1;
  327. if (reg = NR_ESP) or
  328. (reg = current_procinfo.framepointer) or
  329. not(assigned(p1)) then
  330. { this happens with registers which are loaded implicitely, outside the }
  331. { current block (e.g. esi with self) }
  332. exit;
  333. { make sure we allocate it for this instruction }
  334. getnextinstruction(p2,p2);
  335. lastRemovedWasDealloc := false;
  336. removedSomething := false;
  337. firstRemovedWasAlloc := false;
  338. {$ifdef allocregdebug}
  339. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  340. ' from here...'));
  341. insertllitem(asml,p1.previous,p1,hp);
  342. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  343. ' till here...'));
  344. insertllitem(asml,p2,p2.next,hp);
  345. {$endif allocregdebug}
  346. { do it the safe way: always allocate the full super register,
  347. as we do no register re-allocation in the peephole optimizer,
  348. this does not hurt
  349. }
  350. case getregtype(reg) of
  351. R_MMREGISTER:
  352. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  353. R_INTREGISTER:
  354. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  355. end;
  356. if not(RegInUsedRegs(reg,initialusedregs)) then
  357. begin
  358. hp := tai_regalloc.alloc(reg,nil);
  359. insertllItem(p1.previous,p1,hp);
  360. IncludeRegInUsedRegs(reg,initialusedregs);
  361. end;
  362. while assigned(p1) and
  363. (p1 <> p2) do
  364. begin
  365. if assigned(p1.optinfo) then
  366. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  367. p1 := tai(p1.next);
  368. repeat
  369. while assigned(p1) and
  370. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  371. p1 := tai(p1.next);
  372. { remove all allocation/deallocation info about the register in between }
  373. if assigned(p1) and
  374. (p1.typ = ait_regalloc) then
  375. begin
  376. { same super register, different sub register? }
  377. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  378. begin
  379. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  380. internalerror(2016101501);
  381. tai_regalloc(p1).reg:=reg;
  382. end;
  383. if tai_regalloc(p1).reg=reg then
  384. begin
  385. if not removedSomething then
  386. begin
  387. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  388. removedSomething := true;
  389. end;
  390. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  391. hp := tai(p1.Next);
  392. asml.Remove(p1);
  393. p1.free;
  394. p1 := hp;
  395. end
  396. else
  397. p1 := tai(p1.next);
  398. end;
  399. until not(assigned(p1)) or
  400. not(p1.typ in SkipInstr);
  401. end;
  402. if assigned(p1) then
  403. begin
  404. if firstRemovedWasAlloc then
  405. begin
  406. hp := tai_regalloc.Alloc(reg,nil);
  407. insertLLItem(start.previous,start,hp);
  408. end;
  409. if lastRemovedWasDealloc then
  410. begin
  411. hp := tai_regalloc.DeAlloc(reg,nil);
  412. insertLLItem(p1.previous,p1,hp);
  413. end;
  414. end;
  415. end;
  416. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  417. var
  418. p: taicpu;
  419. begin
  420. if not assigned(hp) or
  421. (hp.typ <> ait_instruction) then
  422. begin
  423. Result := false;
  424. exit;
  425. end;
  426. p := taicpu(hp);
  427. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  428. with insprop[p.opcode] do
  429. begin
  430. case getsubreg(reg) of
  431. R_SUBW,R_SUBD,R_SUBQ:
  432. Result:=
  433. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  434. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  435. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  436. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  437. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  438. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  439. R_SUBFLAGCARRY:
  440. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  441. R_SUBFLAGPARITY:
  442. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  443. R_SUBFLAGAUXILIARY:
  444. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  445. R_SUBFLAGZERO:
  446. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  447. R_SUBFLAGSIGN:
  448. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  449. R_SUBFLAGOVERFLOW:
  450. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  451. R_SUBFLAGINTERRUPT:
  452. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  453. R_SUBFLAGDIRECTION:
  454. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  455. else
  456. internalerror(2017050501);
  457. end;
  458. exit;
  459. end;
  460. Result :=
  461. (((p.opcode = A_MOV) or
  462. (p.opcode = A_MOVZX) or
  463. (p.opcode = A_MOVSX) or
  464. (p.opcode = A_LEA) or
  465. (p.opcode = A_VMOVSS) or
  466. (p.opcode = A_VMOVSD) or
  467. (p.opcode = A_VMOVAPD) or
  468. (p.opcode = A_VMOVAPS) or
  469. (p.opcode = A_VMOVQ) or
  470. (p.opcode = A_MOVSS) or
  471. (p.opcode = A_MOVSD) or
  472. (p.opcode = A_MOVQ) or
  473. (p.opcode = A_MOVAPD) or
  474. (p.opcode = A_MOVAPS) or
  475. {$ifndef x86_64}
  476. (p.opcode = A_LDS) or
  477. (p.opcode = A_LES) or
  478. {$endif not x86_64}
  479. (p.opcode = A_LFS) or
  480. (p.opcode = A_LGS) or
  481. (p.opcode = A_LSS)) and
  482. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  483. (p.oper[1]^.typ = top_reg) and
  484. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  485. ((p.oper[0]^.typ = top_const) or
  486. ((p.oper[0]^.typ = top_reg) and
  487. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  488. ((p.oper[0]^.typ = top_ref) and
  489. not RegInRef(reg,p.oper[0]^.ref^)))) or
  490. ((p.opcode = A_POP) and
  491. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  492. ((p.opcode = A_IMUL) and
  493. (p.ops=3) and
  494. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  495. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  496. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  497. ((((p.opcode = A_IMUL) or
  498. (p.opcode = A_MUL)) and
  499. (p.ops=1)) and
  500. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  501. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  502. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  503. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  504. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  505. {$ifdef x86_64}
  506. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  507. {$endif x86_64}
  508. )) or
  509. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  510. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  511. {$ifdef x86_64}
  512. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  513. {$endif x86_64}
  514. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  515. {$ifndef x86_64}
  516. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  517. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  518. {$endif not x86_64}
  519. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  520. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  521. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  522. {$ifndef x86_64}
  523. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  524. {$endif not x86_64}
  525. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  526. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  527. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  528. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  529. {$ifdef x86_64}
  530. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  531. {$endif x86_64}
  532. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  533. (((p.opcode = A_FSTSW) or
  534. (p.opcode = A_FNSTSW)) and
  535. (p.oper[0]^.typ=top_reg) and
  536. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  537. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  538. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  539. (p.oper[0]^.reg=p.oper[1]^.reg) and
  540. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  541. end;
  542. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  543. var
  544. hp2,hp3 : tai;
  545. begin
  546. { some x86-64 issue a NOP before the real exit code }
  547. if MatchInstruction(p,A_NOP,[]) then
  548. GetNextInstruction(p,p);
  549. result:=assigned(p) and (p.typ=ait_instruction) and
  550. ((taicpu(p).opcode = A_RET) or
  551. ((taicpu(p).opcode=A_LEAVE) and
  552. GetNextInstruction(p,hp2) and
  553. MatchInstruction(hp2,A_RET,[S_NO])
  554. ) or
  555. ((((taicpu(p).opcode=A_MOV) and
  556. MatchOpType(taicpu(p),top_reg,top_reg) and
  557. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  558. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  559. ((taicpu(p).opcode=A_LEA) and
  560. MatchOpType(taicpu(p),top_ref,top_reg) and
  561. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  562. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  563. )
  564. ) and
  565. GetNextInstruction(p,hp2) and
  566. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  567. MatchOpType(taicpu(hp2),top_reg) and
  568. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  569. GetNextInstruction(hp2,hp3) and
  570. MatchInstruction(hp3,A_RET,[S_NO])
  571. )
  572. );
  573. end;
  574. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  575. begin
  576. isFoldableArithOp := False;
  577. case hp1.opcode of
  578. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  579. isFoldableArithOp :=
  580. ((taicpu(hp1).oper[0]^.typ = top_const) or
  581. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  582. (taicpu(hp1).oper[0]^.reg <> reg))) and
  583. (taicpu(hp1).oper[1]^.typ = top_reg) and
  584. (taicpu(hp1).oper[1]^.reg = reg);
  585. A_INC,A_DEC,A_NEG,A_NOT:
  586. isFoldableArithOp :=
  587. (taicpu(hp1).oper[0]^.typ = top_reg) and
  588. (taicpu(hp1).oper[0]^.reg = reg);
  589. end;
  590. end;
  591. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  592. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  593. var
  594. hp2: tai;
  595. begin
  596. hp2 := p;
  597. repeat
  598. hp2 := tai(hp2.previous);
  599. if assigned(hp2) and
  600. (hp2.typ = ait_regalloc) and
  601. (tai_regalloc(hp2).ratype=ra_dealloc) and
  602. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  603. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  604. begin
  605. asml.remove(hp2);
  606. hp2.free;
  607. break;
  608. end;
  609. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  610. end;
  611. begin
  612. case current_procinfo.procdef.returndef.typ of
  613. arraydef,recorddef,pointerdef,
  614. stringdef,enumdef,procdef,objectdef,errordef,
  615. filedef,setdef,procvardef,
  616. classrefdef,forwarddef:
  617. DoRemoveLastDeallocForFuncRes(RS_EAX);
  618. orddef:
  619. if current_procinfo.procdef.returndef.size <> 0 then
  620. begin
  621. DoRemoveLastDeallocForFuncRes(RS_EAX);
  622. { for int64/qword }
  623. if current_procinfo.procdef.returndef.size = 8 then
  624. DoRemoveLastDeallocForFuncRes(RS_EDX);
  625. end;
  626. end;
  627. end;
  628. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  629. var
  630. TmpUsedRegs : TAllUsedRegs;
  631. hp1,hp2 : tai;
  632. begin
  633. result:=false;
  634. if MatchOpType(taicpu(p),top_reg,top_reg) then
  635. begin
  636. { vmova* reg1,reg1
  637. =>
  638. <nop> }
  639. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  640. begin
  641. GetNextInstruction(p,hp1);
  642. asml.Remove(p);
  643. p.Free;
  644. p:=hp1;
  645. result:=true;
  646. end
  647. else if GetNextInstruction(p,hp1) then
  648. begin
  649. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  650. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  651. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  652. begin
  653. { vmova* reg1,reg2
  654. vmova* reg2,reg3
  655. dealloc reg2
  656. =>
  657. vmova* reg1,reg3 }
  658. CopyUsedRegs(TmpUsedRegs);
  659. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  660. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  661. begin
  662. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  663. asml.Remove(hp1);
  664. hp1.Free;
  665. result:=true;
  666. end
  667. { special case:
  668. vmova* reg1,reg2
  669. vmova* reg2,reg1
  670. =>
  671. vmova* reg1,reg2 }
  672. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  673. begin
  674. asml.Remove(hp1);
  675. hp1.Free;
  676. result:=true;
  677. end
  678. end
  679. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  680. { we mix single and double opperations here because we assume that the compiler
  681. generates vmovapd only after double operations and vmovaps only after single operations }
  682. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  683. GetNextInstruction(hp1,hp2) and
  684. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  685. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  686. begin
  687. CopyUsedRegs(TmpUsedRegs);
  688. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  689. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  690. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  691. then
  692. begin
  693. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  694. asml.Remove(p);
  695. p.Free;
  696. asml.Remove(hp2);
  697. hp2.Free;
  698. p:=hp1;
  699. end;
  700. end;
  701. end;
  702. end;
  703. end;
  704. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  705. var
  706. TmpUsedRegs : TAllUsedRegs;
  707. hp1 : tai;
  708. begin
  709. result:=false;
  710. if GetNextInstruction(p,hp1) and
  711. { we mix single and double opperations here because we assume that the compiler
  712. generates vmovapd only after double operations and vmovaps only after single operations }
  713. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  714. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  715. (taicpu(hp1).oper[1]^.typ=top_reg) then
  716. begin
  717. CopyUsedRegs(TmpUsedRegs);
  718. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  719. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  720. ) then
  721. begin
  722. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  723. asml.Remove(hp1);
  724. hp1.Free;
  725. result:=true;
  726. end;
  727. end;
  728. end;
  729. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  730. var
  731. hp1, hp2: tai;
  732. TmpUsedRegs : TAllUsedRegs;
  733. GetNextIntruction_p : Boolean;
  734. begin
  735. Result:=false;
  736. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  737. if GetNextIntruction_p and
  738. MatchInstruction(hp1,A_AND,[]) and
  739. (taicpu(p).oper[1]^.typ = top_reg) and
  740. MatchOpType(taicpu(hp1),top_const,top_reg) and
  741. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  742. case taicpu(p).opsize Of
  743. S_L:
  744. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  745. begin
  746. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  747. asml.remove(hp1);
  748. hp1.free;
  749. Result:=true;
  750. exit;
  751. end;
  752. end
  753. else if GetNextIntruction_p and
  754. MatchInstruction(hp1,A_MOV,[]) and
  755. (taicpu(p).oper[1]^.typ = top_reg) and
  756. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  757. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  758. begin
  759. CopyUsedRegs(TmpUsedRegs);
  760. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  761. { we have
  762. mov x, %treg
  763. mov %treg, y
  764. }
  765. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  766. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  767. { we've got
  768. mov x, %treg
  769. mov %treg, y
  770. with %treg is not used after }
  771. case taicpu(p).oper[0]^.typ Of
  772. top_reg:
  773. begin
  774. { change
  775. mov %reg, %treg
  776. mov %treg, y
  777. to
  778. mov %reg, y
  779. }
  780. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  781. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  782. asml.remove(hp1);
  783. hp1.free;
  784. ReleaseUsedRegs(TmpUsedRegs);
  785. Exit;
  786. end;
  787. top_ref:
  788. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  789. begin
  790. { change
  791. mov mem, %treg
  792. mov %treg, %reg
  793. to
  794. mov mem, %reg"
  795. }
  796. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  797. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  798. asml.remove(hp1);
  799. hp1.free;
  800. ReleaseUsedRegs(TmpUsedRegs);
  801. Exit;
  802. end;
  803. end;
  804. ReleaseUsedRegs(TmpUsedRegs);
  805. end
  806. else
  807. { Change
  808. mov %reg1, %reg2
  809. xxx %reg2, ???
  810. to
  811. mov %reg1, %reg2
  812. xxx %reg1, ???
  813. to avoid a write/read penalty
  814. }
  815. if MatchOpType(taicpu(p),top_reg,top_reg) and
  816. GetNextInstruction(p,hp1) and
  817. (tai(hp1).typ = ait_instruction) and
  818. (taicpu(hp1).ops >= 1) and
  819. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  820. { we have
  821. mov %reg1, %reg2
  822. XXX %reg2, ???
  823. }
  824. begin
  825. if ((taicpu(hp1).opcode = A_OR) or
  826. (taicpu(hp1).opcode = A_TEST)) and
  827. (taicpu(hp1).oper[1]^.typ = top_reg) and
  828. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  829. { we have
  830. mov %reg1, %reg2
  831. test/or %reg2, %reg2
  832. }
  833. begin
  834. CopyUsedRegs(TmpUsedRegs);
  835. { reg1 will be used after the first instruction,
  836. so update the allocation info }
  837. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  838. if GetNextInstruction(hp1, hp2) and
  839. (hp2.typ = ait_instruction) and
  840. taicpu(hp2).is_jmp and
  841. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  842. { change
  843. mov %reg1, %reg2
  844. test/or %reg2, %reg2
  845. jxx
  846. to
  847. test %reg1, %reg1
  848. jxx
  849. }
  850. begin
  851. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  852. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  853. asml.remove(p);
  854. p.free;
  855. p := hp1;
  856. ReleaseUsedRegs(TmpUsedRegs);
  857. Exit;
  858. end
  859. else
  860. { change
  861. mov %reg1, %reg2
  862. test/or %reg2, %reg2
  863. to
  864. mov %reg1, %reg2
  865. test/or %reg1, %reg1
  866. }
  867. begin
  868. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  869. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  870. end;
  871. ReleaseUsedRegs(TmpUsedRegs);
  872. end
  873. end
  874. else
  875. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  876. x >= RetOffset) as it doesn't do anything (it writes either to a
  877. parameter or to the temporary storage room for the function
  878. result)
  879. }
  880. if GetNextIntruction_p and
  881. (tai(hp1).typ = ait_instruction) then
  882. begin
  883. if IsExitCode(hp1) and
  884. MatchOpType(taicpu(p),top_reg,top_ref) and
  885. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  886. not(assigned(current_procinfo.procdef.funcretsym) and
  887. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  888. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  889. begin
  890. asml.remove(p);
  891. p.free;
  892. p:=hp1;
  893. DebugMsg('Peephole removed deadstore before leave/ret',p);
  894. RemoveLastDeallocForFuncRes(p);
  895. exit;
  896. end
  897. { change
  898. mov reg1, mem1
  899. test/cmp x, mem1
  900. to
  901. mov reg1, mem1
  902. test/cmp x, reg1
  903. }
  904. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  905. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  906. (taicpu(hp1).oper[1]^.typ = top_ref) and
  907. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  908. begin
  909. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  910. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  911. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  912. end;
  913. end;
  914. { Next instruction is also a MOV ? }
  915. if GetNextIntruction_p and
  916. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  917. begin
  918. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  919. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  920. { mov reg1, mem1 or mov mem1, reg1
  921. mov mem2, reg2 mov reg2, mem2}
  922. begin
  923. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  924. { mov reg1, mem1 or mov mem1, reg1
  925. mov mem2, reg1 mov reg2, mem1}
  926. begin
  927. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  928. { Removes the second statement from
  929. mov reg1, mem1/reg2
  930. mov mem1/reg2, reg1 }
  931. begin
  932. if taicpu(p).oper[0]^.typ=top_reg then
  933. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  934. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  935. asml.remove(hp1);
  936. hp1.free;
  937. Result:=true;
  938. exit;
  939. end
  940. else
  941. begin
  942. CopyUsedRegs(TmpUsedRegs);
  943. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  944. if (taicpu(p).oper[1]^.typ = top_ref) and
  945. { mov reg1, mem1
  946. mov mem2, reg1 }
  947. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  948. GetNextInstruction(hp1, hp2) and
  949. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  950. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  951. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  952. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  953. { change to
  954. mov reg1, mem1 mov reg1, mem1
  955. mov mem2, reg1 cmp reg1, mem2
  956. cmp mem1, reg1
  957. }
  958. begin
  959. asml.remove(hp2);
  960. hp2.free;
  961. taicpu(hp1).opcode := A_CMP;
  962. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  963. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  964. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  965. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  966. end;
  967. ReleaseUsedRegs(TmpUsedRegs);
  968. end;
  969. end
  970. else if (taicpu(p).oper[1]^.typ=top_ref) and
  971. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  972. begin
  973. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  974. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  975. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  976. end
  977. else
  978. begin
  979. CopyUsedRegs(TmpUsedRegs);
  980. if GetNextInstruction(hp1, hp2) and
  981. MatchOpType(taicpu(p),top_ref,top_reg) and
  982. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  983. (taicpu(hp1).oper[1]^.typ = top_ref) and
  984. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  985. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  986. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  987. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  988. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  989. { mov mem1, %reg1
  990. mov %reg1, mem2
  991. mov mem2, reg2
  992. to:
  993. mov mem1, reg2
  994. mov reg2, mem2}
  995. begin
  996. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  997. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  998. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  999. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1000. asml.remove(hp2);
  1001. hp2.free;
  1002. end
  1003. {$ifdef i386}
  1004. { this is enabled for i386 only, as the rules to create the reg sets below
  1005. are too complicated for x86-64, so this makes this code too error prone
  1006. on x86-64
  1007. }
  1008. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1009. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1010. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1011. { mov mem1, reg1 mov mem1, reg1
  1012. mov reg1, mem2 mov reg1, mem2
  1013. mov mem2, reg2 mov mem2, reg1
  1014. to: to:
  1015. mov mem1, reg1 mov mem1, reg1
  1016. mov mem1, reg2 mov reg1, mem2
  1017. mov reg1, mem2
  1018. or (if mem1 depends on reg1
  1019. and/or if mem2 depends on reg2)
  1020. to:
  1021. mov mem1, reg1
  1022. mov reg1, mem2
  1023. mov reg1, reg2
  1024. }
  1025. begin
  1026. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1027. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1028. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1029. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1030. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1031. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1032. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1033. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1034. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1035. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1036. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1037. end
  1038. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1039. begin
  1040. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1041. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1042. end
  1043. else
  1044. begin
  1045. asml.remove(hp2);
  1046. hp2.free;
  1047. end
  1048. {$endif i386}
  1049. ;
  1050. ReleaseUsedRegs(TmpUsedRegs);
  1051. end;
  1052. end
  1053. (* { movl [mem1],reg1
  1054. movl [mem1],reg2
  1055. to
  1056. movl [mem1],reg1
  1057. movl reg1,reg2
  1058. }
  1059. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1060. (taicpu(p).oper[1]^.typ = top_reg) and
  1061. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1062. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1063. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1064. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1065. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1066. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1067. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1068. else*)
  1069. { movl const1,[mem1]
  1070. movl [mem1],reg1
  1071. to
  1072. movl const1,reg1
  1073. movl reg1,[mem1]
  1074. }
  1075. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1076. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1077. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1078. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1079. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1080. begin
  1081. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1082. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1083. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1084. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1085. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1086. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1087. end
  1088. end
  1089. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1090. GetNextIntruction_p and
  1091. (hp1.typ = ait_instruction) and
  1092. GetNextInstruction(hp1, hp2) and
  1093. MatchInstruction(hp2,A_MOV,[]) and
  1094. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1095. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1096. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1097. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1098. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1099. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1100. ) then
  1101. { change movsX/movzX reg/ref, reg2
  1102. add/sub/or/... reg3/$const, reg2
  1103. mov reg2 reg/ref
  1104. to add/sub/or/... reg3/$const, reg/ref }
  1105. begin
  1106. CopyUsedRegs(TmpUsedRegs);
  1107. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1108. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1109. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1110. begin
  1111. { by example:
  1112. movswl %si,%eax movswl %si,%eax p
  1113. decl %eax addl %edx,%eax hp1
  1114. movw %ax,%si movw %ax,%si hp2
  1115. ->
  1116. movswl %si,%eax movswl %si,%eax p
  1117. decw %eax addw %edx,%eax hp1
  1118. movw %ax,%si movw %ax,%si hp2
  1119. }
  1120. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1121. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1122. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1123. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1124. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1125. {
  1126. ->
  1127. movswl %si,%eax movswl %si,%eax p
  1128. decw %si addw %dx,%si hp1
  1129. movw %ax,%si movw %ax,%si hp2
  1130. }
  1131. case taicpu(hp1).ops of
  1132. 1:
  1133. begin
  1134. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1135. if taicpu(hp1).oper[0]^.typ=top_reg then
  1136. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1137. end;
  1138. 2:
  1139. begin
  1140. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1141. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1142. (taicpu(hp1).opcode<>A_SHL) and
  1143. (taicpu(hp1).opcode<>A_SHR) and
  1144. (taicpu(hp1).opcode<>A_SAR) then
  1145. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1146. end;
  1147. else
  1148. internalerror(2008042701);
  1149. end;
  1150. {
  1151. ->
  1152. decw %si addw %dx,%si p
  1153. }
  1154. asml.remove(p);
  1155. asml.remove(hp2);
  1156. p.Free;
  1157. hp2.Free;
  1158. p := hp1;
  1159. end;
  1160. ReleaseUsedRegs(TmpUsedRegs);
  1161. end
  1162. else if GetNextIntruction_p and
  1163. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1164. GetNextInstruction(hp1, hp2) and
  1165. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1166. MatchOperand(Taicpu(p).oper[0]^,0) and
  1167. (Taicpu(p).oper[1]^.typ = top_reg) and
  1168. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1169. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1170. { mov reg1,0
  1171. bts reg1,operand1 --> mov reg1,operand2
  1172. or reg1,operand2 bts reg1,operand1}
  1173. begin
  1174. Taicpu(hp2).opcode:=A_MOV;
  1175. asml.remove(hp1);
  1176. insertllitem(hp2,hp2.next,hp1);
  1177. asml.remove(p);
  1178. p.free;
  1179. p:=hp1;
  1180. end
  1181. else if GetNextIntruction_p and
  1182. MatchInstruction(hp1,A_LEA,[S_L]) and
  1183. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1184. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1185. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1186. ) or
  1187. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1188. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1189. )
  1190. ) then
  1191. { mov reg1,ref
  1192. lea reg2,[reg1,reg2]
  1193. to
  1194. add reg2,ref}
  1195. begin
  1196. CopyUsedRegs(TmpUsedRegs);
  1197. { reg1 may not be used afterwards }
  1198. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1199. begin
  1200. Taicpu(hp1).opcode:=A_ADD;
  1201. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1202. DebugMsg('Peephole MovLea2Add done',hp1);
  1203. asml.remove(p);
  1204. p.free;
  1205. p:=hp1;
  1206. end;
  1207. ReleaseUsedRegs(TmpUsedRegs);
  1208. end;
  1209. end;
  1210. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1211. var
  1212. TmpUsedRegs : TAllUsedRegs;
  1213. hp1,hp2: tai;
  1214. begin
  1215. Result:=false;
  1216. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1217. GetNextInstruction(p, hp1) and
  1218. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1219. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1220. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1221. or
  1222. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1223. ) and
  1224. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1225. { mov reg1, reg2
  1226. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1227. begin
  1228. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1229. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1230. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1231. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1232. asml.remove(p);
  1233. p.free;
  1234. p := hp1;
  1235. Result:=true;
  1236. exit;
  1237. end
  1238. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1239. GetNextInstruction(p,hp1) and
  1240. (hp1.typ = ait_instruction) and
  1241. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1242. doing it separately in both branches allows to do the cheap checks
  1243. with low probability earlier }
  1244. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1245. GetNextInstruction(hp1,hp2) and
  1246. MatchInstruction(hp2,A_MOV,[])
  1247. ) or
  1248. ((taicpu(hp1).opcode=A_LEA) and
  1249. GetNextInstruction(hp1,hp2) and
  1250. MatchInstruction(hp2,A_MOV,[]) and
  1251. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1252. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1253. ) or
  1254. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1255. taicpu(p).oper[1]^.reg) and
  1256. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1257. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1258. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1259. ) and
  1260. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1261. )
  1262. ) and
  1263. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1264. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1265. begin
  1266. CopyUsedRegs(TmpUsedRegs);
  1267. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1268. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1269. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1270. { change mov (ref), reg
  1271. add/sub/or/... reg2/$const, reg
  1272. mov reg, (ref)
  1273. # release reg
  1274. to add/sub/or/... reg2/$const, (ref) }
  1275. begin
  1276. case taicpu(hp1).opcode of
  1277. A_INC,A_DEC,A_NOT,A_NEG :
  1278. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1279. A_LEA :
  1280. begin
  1281. taicpu(hp1).opcode:=A_ADD;
  1282. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1283. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1284. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1285. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1286. else
  1287. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1288. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1289. DebugMsg('Peephole FoldLea done',hp1);
  1290. end
  1291. else
  1292. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1293. end;
  1294. asml.remove(p);
  1295. asml.remove(hp2);
  1296. p.free;
  1297. hp2.free;
  1298. p := hp1
  1299. end;
  1300. ReleaseUsedRegs(TmpUsedRegs);
  1301. end;
  1302. end;
  1303. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1304. var
  1305. TmpUsedRegs : TAllUsedRegs;
  1306. hp1 : tai;
  1307. begin
  1308. Result:=false;
  1309. if (taicpu(p).ops >= 2) and
  1310. ((taicpu(p).oper[0]^.typ = top_const) or
  1311. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1312. (taicpu(p).oper[1]^.typ = top_reg) and
  1313. ((taicpu(p).ops = 2) or
  1314. ((taicpu(p).oper[2]^.typ = top_reg) and
  1315. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1316. GetLastInstruction(p,hp1) and
  1317. MatchInstruction(hp1,A_MOV,[]) and
  1318. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1319. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1320. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1321. begin
  1322. CopyUsedRegs(TmpUsedRegs);
  1323. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1324. { change
  1325. mov reg1,reg2
  1326. imul y,reg2 to imul y,reg1,reg2 }
  1327. begin
  1328. taicpu(p).ops := 3;
  1329. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1330. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1331. DebugMsg('Peephole MovImul2Imul done',p);
  1332. asml.remove(hp1);
  1333. hp1.free;
  1334. result:=true;
  1335. end;
  1336. ReleaseUsedRegs(TmpUsedRegs);
  1337. end;
  1338. end;
  1339. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1340. var
  1341. hp1 : tai;
  1342. begin
  1343. {
  1344. change
  1345. jmp .L1
  1346. ...
  1347. .L1:
  1348. ret
  1349. into
  1350. ret
  1351. }
  1352. result:=false;
  1353. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1354. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1355. begin
  1356. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1357. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1358. MatchInstruction(hp1,A_RET,[S_NO]) then
  1359. begin
  1360. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1361. taicpu(p).opcode:=A_RET;
  1362. taicpu(p).is_jmp:=false;
  1363. taicpu(p).ops:=taicpu(hp1).ops;
  1364. case taicpu(hp1).ops of
  1365. 0:
  1366. taicpu(p).clearop(0);
  1367. 1:
  1368. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1369. else
  1370. internalerror(2016041301);
  1371. end;
  1372. result:=true;
  1373. end;
  1374. end;
  1375. end;
  1376. function CanBeCMOV(p : tai) : boolean;
  1377. begin
  1378. CanBeCMOV:=assigned(p) and
  1379. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1380. { we can't use cmov ref,reg because
  1381. ref could be nil and cmov still throws an exception
  1382. if ref=nil but the mov isn't done (FK)
  1383. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1384. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1385. }
  1386. MatchOpType(taicpu(p),top_reg,top_reg);
  1387. end;
  1388. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1389. var
  1390. hp1,hp2,hp3: tai;
  1391. carryadd_opcode : TAsmOp;
  1392. l : Longint;
  1393. condition : TAsmCond;
  1394. begin
  1395. { jb @@1 cmc
  1396. inc/dec operand --> adc/sbb operand,0
  1397. @@1:
  1398. ... and ...
  1399. jnb @@1
  1400. inc/dec operand --> adc/sbb operand,0
  1401. @@1: }
  1402. result:=false;
  1403. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1404. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1405. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1406. begin
  1407. carryadd_opcode:=A_NONE;
  1408. if Taicpu(p).condition in [C_NAE,C_B] then
  1409. begin
  1410. if Taicpu(hp1).opcode=A_INC then
  1411. carryadd_opcode:=A_ADC;
  1412. if Taicpu(hp1).opcode=A_DEC then
  1413. carryadd_opcode:=A_SBB;
  1414. if carryadd_opcode<>A_NONE then
  1415. begin
  1416. Taicpu(p).clearop(0);
  1417. Taicpu(p).ops:=0;
  1418. Taicpu(p).is_jmp:=false;
  1419. Taicpu(p).opcode:=A_CMC;
  1420. Taicpu(p).condition:=C_NONE;
  1421. Taicpu(hp1).ops:=2;
  1422. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1423. Taicpu(hp1).loadconst(0,0);
  1424. Taicpu(hp1).opcode:=carryadd_opcode;
  1425. result:=true;
  1426. exit;
  1427. end;
  1428. end;
  1429. if Taicpu(p).condition in [C_AE,C_NB] then
  1430. begin
  1431. if Taicpu(hp1).opcode=A_INC then
  1432. carryadd_opcode:=A_ADC;
  1433. if Taicpu(hp1).opcode=A_DEC then
  1434. carryadd_opcode:=A_SBB;
  1435. if carryadd_opcode<>A_NONE then
  1436. begin
  1437. asml.remove(p);
  1438. p.free;
  1439. Taicpu(hp1).ops:=2;
  1440. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1441. Taicpu(hp1).loadconst(0,0);
  1442. Taicpu(hp1).opcode:=carryadd_opcode;
  1443. p:=hp1;
  1444. result:=true;
  1445. exit;
  1446. end;
  1447. end;
  1448. end;
  1449. {$ifndef i8086}
  1450. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1451. begin
  1452. { check for
  1453. jCC xxx
  1454. <several movs>
  1455. xxx:
  1456. }
  1457. l:=0;
  1458. GetNextInstruction(p, hp1);
  1459. while assigned(hp1) and
  1460. CanBeCMOV(hp1) and
  1461. { stop on labels }
  1462. not(hp1.typ=ait_label) do
  1463. begin
  1464. inc(l);
  1465. GetNextInstruction(hp1,hp1);
  1466. end;
  1467. if assigned(hp1) then
  1468. begin
  1469. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1470. begin
  1471. if (l<=4) and (l>0) then
  1472. begin
  1473. condition:=inverse_cond(taicpu(p).condition);
  1474. hp2:=p;
  1475. GetNextInstruction(p,hp1);
  1476. p:=hp1;
  1477. repeat
  1478. taicpu(hp1).opcode:=A_CMOVcc;
  1479. taicpu(hp1).condition:=condition;
  1480. GetNextInstruction(hp1,hp1);
  1481. until not(assigned(hp1)) or
  1482. not(CanBeCMOV(hp1));
  1483. { wait with removing else GetNextInstruction could
  1484. ignore the label if it was the only usage in the
  1485. jump moved away }
  1486. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1487. { if the label refs. reach zero, remove any alignment before the label }
  1488. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1489. begin
  1490. asml.Remove(hp1);
  1491. hp1.Free;
  1492. end;
  1493. asml.remove(hp2);
  1494. hp2.free;
  1495. result:=true;
  1496. exit;
  1497. end;
  1498. end
  1499. else
  1500. begin
  1501. { check further for
  1502. jCC xxx
  1503. <several movs 1>
  1504. jmp yyy
  1505. xxx:
  1506. <several movs 2>
  1507. yyy:
  1508. }
  1509. { hp2 points to jmp yyy }
  1510. hp2:=hp1;
  1511. { skip hp1 to xxx }
  1512. GetNextInstruction(hp1, hp1);
  1513. if assigned(hp2) and
  1514. assigned(hp1) and
  1515. (l<=3) and
  1516. (hp2.typ=ait_instruction) and
  1517. (taicpu(hp2).is_jmp) and
  1518. (taicpu(hp2).condition=C_None) and
  1519. { real label and jump, no further references to the
  1520. label are allowed }
  1521. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1522. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1523. begin
  1524. l:=0;
  1525. { skip hp1 to <several moves 2> }
  1526. GetNextInstruction(hp1, hp1);
  1527. while assigned(hp1) and
  1528. CanBeCMOV(hp1) do
  1529. begin
  1530. inc(l);
  1531. GetNextInstruction(hp1, hp1);
  1532. end;
  1533. { hp1 points to yyy: }
  1534. if assigned(hp1) and
  1535. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1536. begin
  1537. condition:=inverse_cond(taicpu(p).condition);
  1538. GetNextInstruction(p,hp1);
  1539. hp3:=p;
  1540. p:=hp1;
  1541. repeat
  1542. taicpu(hp1).opcode:=A_CMOVcc;
  1543. taicpu(hp1).condition:=condition;
  1544. GetNextInstruction(hp1,hp1);
  1545. until not(assigned(hp1)) or
  1546. not(CanBeCMOV(hp1));
  1547. { hp2 is still at jmp yyy }
  1548. GetNextInstruction(hp2,hp1);
  1549. { hp2 is now at xxx: }
  1550. condition:=inverse_cond(condition);
  1551. GetNextInstruction(hp1,hp1);
  1552. { hp1 is now at <several movs 2> }
  1553. repeat
  1554. taicpu(hp1).opcode:=A_CMOVcc;
  1555. taicpu(hp1).condition:=condition;
  1556. GetNextInstruction(hp1,hp1);
  1557. until not(assigned(hp1)) or
  1558. not(CanBeCMOV(hp1));
  1559. {
  1560. asml.remove(hp1.next)
  1561. hp1.next.free;
  1562. asml.remove(hp1);
  1563. hp1.free;
  1564. }
  1565. { remove jCC }
  1566. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1567. asml.remove(hp3);
  1568. hp3.free;
  1569. { remove jmp }
  1570. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1571. asml.remove(hp2);
  1572. hp2.free;
  1573. result:=true;
  1574. exit;
  1575. end;
  1576. end;
  1577. end;
  1578. end;
  1579. end;
  1580. {$endif i8086}
  1581. end;
  1582. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1583. var
  1584. hp1 : tai;
  1585. begin
  1586. Result:=false;
  1587. if not(GetNextInstruction(p, hp1)) then
  1588. exit;
  1589. if MatchOpType(taicpu(p),top_const,top_reg) and
  1590. MatchInstruction(hp1,A_AND,[]) and
  1591. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1592. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1593. { the second register must contain the first one, so compare their subreg types }
  1594. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1595. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1596. { change
  1597. and const1, reg
  1598. and const2, reg
  1599. to
  1600. and (const1 and const2), reg
  1601. }
  1602. begin
  1603. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1604. DebugMsg('Peephole AndAnd2And done',hp1);
  1605. asml.remove(p);
  1606. p.Free;
  1607. p:=hp1;
  1608. Result:=true;
  1609. exit;
  1610. end
  1611. else if MatchOpType(taicpu(p),top_const,top_reg) and
  1612. MatchInstruction(hp1,A_MOVZX,[]) and
  1613. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1614. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1615. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1616. (((taicpu(p).opsize=S_W) and
  1617. (taicpu(hp1).opsize=S_BW)) or
  1618. ((taicpu(p).opsize=S_L) and
  1619. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1620. {$ifdef x86_64}
  1621. or
  1622. ((taicpu(p).opsize=S_Q) and
  1623. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1624. {$endif x86_64}
  1625. ) then
  1626. begin
  1627. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1628. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1629. ) or
  1630. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1631. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1632. {$ifdef x86_64}
  1633. or
  1634. (((taicpu(hp1).opsize)=S_LQ) and
  1635. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1636. )
  1637. {$endif x86_64}
  1638. then
  1639. begin
  1640. DebugMsg('Peephole AndMovzToAnd done',p);
  1641. asml.remove(hp1);
  1642. hp1.free;
  1643. end;
  1644. end
  1645. else if MatchOpType(taicpu(p),top_const,top_reg) and
  1646. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1647. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1648. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1649. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1650. (((taicpu(p).opsize=S_W) and
  1651. (taicpu(hp1).opsize=S_BW)) or
  1652. ((taicpu(p).opsize=S_L) and
  1653. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1654. {$ifdef x86_64}
  1655. or
  1656. ((taicpu(p).opsize=S_Q) and
  1657. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1658. {$endif x86_64}
  1659. ) then
  1660. begin
  1661. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1662. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1663. ) or
  1664. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1665. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1666. {$ifdef x86_64}
  1667. or
  1668. (((taicpu(hp1).opsize)=S_LQ) and
  1669. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1670. )
  1671. {$endif x86_64}
  1672. then
  1673. begin
  1674. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1675. asml.remove(hp1);
  1676. hp1.free;
  1677. end;
  1678. end
  1679. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1680. (hp1.typ = ait_instruction) and
  1681. (taicpu(hp1).is_jmp) and
  1682. (taicpu(hp1).opcode<>A_JMP) and
  1683. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1684. { change
  1685. and x, reg
  1686. jxx
  1687. to
  1688. test x, reg
  1689. jxx
  1690. if reg is deallocated before the
  1691. jump, but only if it's a conditional jump (PFV)
  1692. }
  1693. taicpu(p).opcode := A_TEST;
  1694. end;
  1695. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1696. begin
  1697. if MatchOperand(taicpu(p).oper[0]^,0) and
  1698. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1699. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1700. { change "mov $0, %reg" into "xor %reg, %reg" }
  1701. begin
  1702. taicpu(p).opcode := A_XOR;
  1703. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1704. end;
  1705. end;
  1706. end.