aoptx86.pas 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass2MOV(var p : tai) : boolean;
  55. function OptPass2Imul(var p : tai) : boolean;
  56. function OptPass2Jmp(var p : tai) : boolean;
  57. function OptPass2Jcc(var p : tai) : boolean;
  58. procedure PostPeepholeOptMov(const p : tai);
  59. end;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  62. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  63. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  64. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  65. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  66. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  67. function RefsEqual(const r1, r2: treference): boolean;
  68. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  69. { returns true, if ref is a reference using only the registers passed as base and index
  70. and having an offset }
  71. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  72. implementation
  73. uses
  74. cutils,verbose,
  75. globals,
  76. cpuinfo,
  77. procinfo,
  78. aasmbase,
  79. aoptutils,
  80. symconst,symsym,
  81. itcpugas;
  82. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  83. begin
  84. result :=
  85. (instr.typ = ait_instruction) and
  86. (taicpu(instr).opcode = op) and
  87. ((opsize = []) or (taicpu(instr).opsize in opsize));
  88. end;
  89. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. ((taicpu(instr).opcode = op1) or
  94. (taicpu(instr).opcode = op2)
  95. ) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize));
  97. end;
  98. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  99. begin
  100. result :=
  101. (instr.typ = ait_instruction) and
  102. ((taicpu(instr).opcode = op1) or
  103. (taicpu(instr).opcode = op2) or
  104. (taicpu(instr).opcode = op3)
  105. ) and
  106. ((opsize = []) or (taicpu(instr).opsize in opsize));
  107. end;
  108. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  109. const opsize : topsizes) : boolean;
  110. var
  111. op : TAsmOp;
  112. begin
  113. result:=false;
  114. for op in ops do
  115. begin
  116. if (instr.typ = ait_instruction) and
  117. (taicpu(instr).opcode = op) and
  118. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  119. begin
  120. result:=true;
  121. exit;
  122. end;
  123. end;
  124. end;
  125. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  126. begin
  127. result := (oper.typ = top_reg) and (oper.reg = reg);
  128. end;
  129. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  130. begin
  131. result := (oper.typ = top_const) and (oper.val = a);
  132. end;
  133. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  134. begin
  135. result := oper1.typ = oper2.typ;
  136. if result then
  137. case oper1.typ of
  138. top_const:
  139. Result:=oper1.val = oper2.val;
  140. top_reg:
  141. Result:=oper1.reg = oper2.reg;
  142. top_ref:
  143. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  144. else
  145. internalerror(2013102801);
  146. end
  147. end;
  148. function RefsEqual(const r1, r2: treference): boolean;
  149. begin
  150. RefsEqual :=
  151. (r1.offset = r2.offset) and
  152. (r1.segment = r2.segment) and (r1.base = r2.base) and
  153. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  154. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  155. (r1.relsymbol = r2.relsymbol);
  156. end;
  157. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  158. begin
  159. Result:=(ref.offset=0) and
  160. (ref.scalefactor in [0,1]) and
  161. (ref.segment=NR_NO) and
  162. (ref.symbol=nil) and
  163. (ref.relsymbol=nil) and
  164. ((base=NR_INVALID) or
  165. (ref.base=base)) and
  166. ((index=NR_INVALID) or
  167. (ref.index=index));
  168. end;
  169. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  170. begin
  171. Result:=(ref.scalefactor in [0,1]) and
  172. (ref.segment=NR_NO) and
  173. (ref.symbol=nil) and
  174. (ref.relsymbol=nil) and
  175. ((base=NR_INVALID) or
  176. (ref.base=base)) and
  177. ((index=NR_INVALID) or
  178. (ref.index=index));
  179. end;
  180. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  181. begin
  182. Result:=RegReadByInstruction(reg,hp);
  183. end;
  184. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  185. var
  186. p: taicpu;
  187. opcount: longint;
  188. begin
  189. RegReadByInstruction := false;
  190. if hp.typ <> ait_instruction then
  191. exit;
  192. p := taicpu(hp);
  193. case p.opcode of
  194. A_CALL:
  195. regreadbyinstruction := true;
  196. A_IMUL:
  197. case p.ops of
  198. 1:
  199. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  200. (
  201. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  202. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  203. );
  204. 2,3:
  205. regReadByInstruction :=
  206. reginop(reg,p.oper[0]^) or
  207. reginop(reg,p.oper[1]^);
  208. end;
  209. A_MUL:
  210. begin
  211. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  212. (
  213. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  214. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  215. );
  216. end;
  217. A_IDIV,A_DIV:
  218. begin
  219. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  220. (
  221. (getregtype(reg)=R_INTREGISTER) and
  222. (
  223. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  224. )
  225. );
  226. end;
  227. else
  228. begin
  229. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  230. begin
  231. RegReadByInstruction := false;
  232. exit;
  233. end;
  234. for opcount := 0 to p.ops-1 do
  235. if (p.oper[opCount]^.typ = top_ref) and
  236. RegInRef(reg,p.oper[opcount]^.ref^) then
  237. begin
  238. RegReadByInstruction := true;
  239. exit
  240. end;
  241. { special handling for SSE MOVSD }
  242. if (p.opcode=A_MOVSD) and (p.ops>0) then
  243. begin
  244. if p.ops<>2 then
  245. internalerror(2017042702);
  246. regReadByInstruction := reginop(reg,p.oper[0]^) or
  247. (
  248. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  249. );
  250. exit;
  251. end;
  252. with insprop[p.opcode] do
  253. begin
  254. if getregtype(reg)=R_INTREGISTER then
  255. begin
  256. case getsupreg(reg) of
  257. RS_EAX:
  258. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  259. begin
  260. RegReadByInstruction := true;
  261. exit
  262. end;
  263. RS_ECX:
  264. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  265. begin
  266. RegReadByInstruction := true;
  267. exit
  268. end;
  269. RS_EDX:
  270. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  271. begin
  272. RegReadByInstruction := true;
  273. exit
  274. end;
  275. RS_EBX:
  276. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  277. begin
  278. RegReadByInstruction := true;
  279. exit
  280. end;
  281. RS_ESP:
  282. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  283. begin
  284. RegReadByInstruction := true;
  285. exit
  286. end;
  287. RS_EBP:
  288. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  289. begin
  290. RegReadByInstruction := true;
  291. exit
  292. end;
  293. RS_ESI:
  294. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  295. begin
  296. RegReadByInstruction := true;
  297. exit
  298. end;
  299. RS_EDI:
  300. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  301. begin
  302. RegReadByInstruction := true;
  303. exit
  304. end;
  305. end;
  306. end;
  307. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  308. begin
  309. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  310. begin
  311. case p.condition of
  312. C_A,C_NBE, { CF=0 and ZF=0 }
  313. C_BE,C_NA: { CF=1 or ZF=1 }
  314. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  315. C_AE,C_NB,C_NC, { CF=0 }
  316. C_B,C_NAE,C_C: { CF=1 }
  317. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  318. C_NE,C_NZ, { ZF=0 }
  319. C_E,C_Z: { ZF=1 }
  320. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  321. C_G,C_NLE, { ZF=0 and SF=OF }
  322. C_LE,C_NG: { ZF=1 or SF<>OF }
  323. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  324. C_GE,C_NL, { SF=OF }
  325. C_L,C_NGE: { SF<>OF }
  326. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  327. C_NO, { OF=0 }
  328. C_O: { OF=1 }
  329. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  330. C_NP,C_PO, { PF=0 }
  331. C_P,C_PE: { PF=1 }
  332. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  333. C_NS, { SF=0 }
  334. C_S: { SF=1 }
  335. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  336. else
  337. internalerror(2017042701);
  338. end;
  339. if RegReadByInstruction then
  340. exit;
  341. end;
  342. case getsubreg(reg) of
  343. R_SUBW,R_SUBD,R_SUBQ:
  344. RegReadByInstruction :=
  345. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  346. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  347. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  348. R_SUBFLAGCARRY:
  349. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  350. R_SUBFLAGPARITY:
  351. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  352. R_SUBFLAGAUXILIARY:
  353. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  354. R_SUBFLAGZERO:
  355. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  356. R_SUBFLAGSIGN:
  357. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  358. R_SUBFLAGOVERFLOW:
  359. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  360. R_SUBFLAGINTERRUPT:
  361. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  362. R_SUBFLAGDIRECTION:
  363. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  364. else
  365. internalerror(2017042601);
  366. end;
  367. exit;
  368. end;
  369. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  370. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  371. (p.oper[0]^.reg=p.oper[1]^.reg) then
  372. exit;
  373. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  374. begin
  375. RegReadByInstruction := true;
  376. exit
  377. end;
  378. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  379. begin
  380. RegReadByInstruction := true;
  381. exit
  382. end;
  383. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  384. begin
  385. RegReadByInstruction := true;
  386. exit
  387. end;
  388. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  389. begin
  390. RegReadByInstruction := true;
  391. exit
  392. end;
  393. end;
  394. end;
  395. end;
  396. end;
  397. {$ifdef DEBUG_AOPTCPU}
  398. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  399. begin
  400. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  401. end;
  402. {$else DEBUG_AOPTCPU}
  403. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  404. begin
  405. end;
  406. {$endif DEBUG_AOPTCPU}
  407. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  408. begin
  409. if not SuperRegistersEqual(reg1,reg2) then
  410. exit(false);
  411. if getregtype(reg1)<>R_INTREGISTER then
  412. exit(true); {because SuperRegisterEqual is true}
  413. case getsubreg(reg1) of
  414. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  415. higher, it preserves the high bits, so the new value depends on
  416. reg2's previous value. In other words, it is equivalent to doing:
  417. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  418. R_SUBL:
  419. exit(getsubreg(reg2)=R_SUBL);
  420. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  421. higher, it actually does a:
  422. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  423. R_SUBH:
  424. exit(getsubreg(reg2)=R_SUBH);
  425. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  426. bits of reg2:
  427. reg2 := (reg2 and $ffff0000) or word(reg1); }
  428. R_SUBW:
  429. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  430. { a write to R_SUBD always overwrites every other subregister,
  431. because it clears the high 32 bits of R_SUBQ on x86_64 }
  432. R_SUBD,
  433. R_SUBQ:
  434. exit(true);
  435. else
  436. internalerror(2017042801);
  437. end;
  438. end;
  439. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  440. begin
  441. if not SuperRegistersEqual(reg1,reg2) then
  442. exit(false);
  443. if getregtype(reg1)<>R_INTREGISTER then
  444. exit(true); {because SuperRegisterEqual is true}
  445. case getsubreg(reg1) of
  446. R_SUBL:
  447. exit(getsubreg(reg2)<>R_SUBH);
  448. R_SUBH:
  449. exit(getsubreg(reg2)<>R_SUBL);
  450. R_SUBW,
  451. R_SUBD,
  452. R_SUBQ:
  453. exit(true);
  454. else
  455. internalerror(2017042802);
  456. end;
  457. end;
  458. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  459. var
  460. hp1 : tai;
  461. l : TCGInt;
  462. begin
  463. result:=false;
  464. { changes the code sequence
  465. shr/sar const1, x
  466. shl const2, x
  467. to
  468. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  469. if GetNextInstruction(p, hp1) and
  470. MatchInstruction(hp1,A_SHL,[]) and
  471. (taicpu(p).oper[0]^.typ = top_const) and
  472. (taicpu(hp1).oper[0]^.typ = top_const) and
  473. (taicpu(hp1).opsize = taicpu(p).opsize) and
  474. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  475. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  476. begin
  477. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  478. not(cs_opt_size in current_settings.optimizerswitches) then
  479. begin
  480. { shr/sar const1, %reg
  481. shl const2, %reg
  482. with const1 > const2 }
  483. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  484. taicpu(hp1).opcode := A_AND;
  485. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  486. case taicpu(p).opsize Of
  487. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  488. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  489. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  490. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  491. else
  492. Internalerror(2017050703)
  493. end;
  494. end
  495. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  496. not(cs_opt_size in current_settings.optimizerswitches) then
  497. begin
  498. { shr/sar const1, %reg
  499. shl const2, %reg
  500. with const1 < const2 }
  501. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  502. taicpu(p).opcode := A_AND;
  503. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  504. case taicpu(p).opsize Of
  505. S_B: taicpu(p).loadConst(0,l Xor $ff);
  506. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  507. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  508. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  509. else
  510. Internalerror(2017050702)
  511. end;
  512. end
  513. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  514. begin
  515. { shr/sar const1, %reg
  516. shl const2, %reg
  517. with const1 = const2 }
  518. taicpu(p).opcode := A_AND;
  519. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  520. case taicpu(p).opsize Of
  521. S_B: taicpu(p).loadConst(0,l Xor $ff);
  522. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  523. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  524. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  525. else
  526. Internalerror(2017050701)
  527. end;
  528. asml.remove(hp1);
  529. hp1.free;
  530. end;
  531. end;
  532. end;
  533. { allocates register reg between (and including) instructions p1 and p2
  534. the type of p1 and p2 must not be in SkipInstr
  535. note that this routine is both called from the peephole optimizer
  536. where optinfo is not yet initialised) and from the cse (where it is) }
  537. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  538. var
  539. hp, start: tai;
  540. removedsomething,
  541. firstRemovedWasAlloc,
  542. lastRemovedWasDealloc: boolean;
  543. begin
  544. {$ifdef EXTDEBUG}
  545. { if assigned(p1.optinfo) and
  546. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  547. internalerror(2004101010); }
  548. {$endif EXTDEBUG}
  549. start := p1;
  550. if (reg = NR_ESP) or
  551. (reg = current_procinfo.framepointer) or
  552. not(assigned(p1)) then
  553. { this happens with registers which are loaded implicitely, outside the }
  554. { current block (e.g. esi with self) }
  555. exit;
  556. { make sure we allocate it for this instruction }
  557. getnextinstruction(p2,p2);
  558. lastRemovedWasDealloc := false;
  559. removedSomething := false;
  560. firstRemovedWasAlloc := false;
  561. {$ifdef allocregdebug}
  562. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  563. ' from here...'));
  564. insertllitem(asml,p1.previous,p1,hp);
  565. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  566. ' till here...'));
  567. insertllitem(asml,p2,p2.next,hp);
  568. {$endif allocregdebug}
  569. { do it the safe way: always allocate the full super register,
  570. as we do no register re-allocation in the peephole optimizer,
  571. this does not hurt
  572. }
  573. case getregtype(reg) of
  574. R_MMREGISTER:
  575. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  576. R_INTREGISTER:
  577. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  578. end;
  579. if not(RegInUsedRegs(reg,initialusedregs)) then
  580. begin
  581. hp := tai_regalloc.alloc(reg,nil);
  582. insertllItem(p1.previous,p1,hp);
  583. IncludeRegInUsedRegs(reg,initialusedregs);
  584. end;
  585. while assigned(p1) and
  586. (p1 <> p2) do
  587. begin
  588. if assigned(p1.optinfo) then
  589. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  590. p1 := tai(p1.next);
  591. repeat
  592. while assigned(p1) and
  593. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  594. p1 := tai(p1.next);
  595. { remove all allocation/deallocation info about the register in between }
  596. if assigned(p1) and
  597. (p1.typ = ait_regalloc) then
  598. begin
  599. { same super register, different sub register? }
  600. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  601. begin
  602. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  603. internalerror(2016101501);
  604. tai_regalloc(p1).reg:=reg;
  605. end;
  606. if tai_regalloc(p1).reg=reg then
  607. begin
  608. if not removedSomething then
  609. begin
  610. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  611. removedSomething := true;
  612. end;
  613. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  614. hp := tai(p1.Next);
  615. asml.Remove(p1);
  616. p1.free;
  617. p1 := hp;
  618. end
  619. else
  620. p1 := tai(p1.next);
  621. end;
  622. until not(assigned(p1)) or
  623. not(p1.typ in SkipInstr);
  624. end;
  625. if assigned(p1) then
  626. begin
  627. if firstRemovedWasAlloc then
  628. begin
  629. hp := tai_regalloc.Alloc(reg,nil);
  630. insertLLItem(start.previous,start,hp);
  631. end;
  632. if lastRemovedWasDealloc then
  633. begin
  634. hp := tai_regalloc.DeAlloc(reg,nil);
  635. insertLLItem(p1.previous,p1,hp);
  636. end;
  637. end;
  638. end;
  639. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  640. var
  641. p: taicpu;
  642. begin
  643. if not assigned(hp) or
  644. (hp.typ <> ait_instruction) then
  645. begin
  646. Result := false;
  647. exit;
  648. end;
  649. p := taicpu(hp);
  650. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  651. with insprop[p.opcode] do
  652. begin
  653. case getsubreg(reg) of
  654. R_SUBW,R_SUBD,R_SUBQ:
  655. Result:=
  656. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  657. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  658. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  659. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  660. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  661. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  662. R_SUBFLAGCARRY:
  663. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  664. R_SUBFLAGPARITY:
  665. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  666. R_SUBFLAGAUXILIARY:
  667. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGZERO:
  669. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  670. R_SUBFLAGSIGN:
  671. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  672. R_SUBFLAGOVERFLOW:
  673. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  674. R_SUBFLAGINTERRUPT:
  675. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  676. R_SUBFLAGDIRECTION:
  677. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  678. else
  679. internalerror(2017050501);
  680. end;
  681. exit;
  682. end;
  683. Result :=
  684. (((p.opcode = A_MOV) or
  685. (p.opcode = A_MOVZX) or
  686. (p.opcode = A_MOVSX) or
  687. (p.opcode = A_LEA) or
  688. (p.opcode = A_VMOVSS) or
  689. (p.opcode = A_VMOVSD) or
  690. (p.opcode = A_VMOVAPD) or
  691. (p.opcode = A_VMOVAPS) or
  692. (p.opcode = A_VMOVQ) or
  693. (p.opcode = A_MOVSS) or
  694. (p.opcode = A_MOVSD) or
  695. (p.opcode = A_MOVQ) or
  696. (p.opcode = A_MOVAPD) or
  697. (p.opcode = A_MOVAPS) or
  698. {$ifndef x86_64}
  699. (p.opcode = A_LDS) or
  700. (p.opcode = A_LES) or
  701. {$endif not x86_64}
  702. (p.opcode = A_LFS) or
  703. (p.opcode = A_LGS) or
  704. (p.opcode = A_LSS)) and
  705. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  706. (p.oper[1]^.typ = top_reg) and
  707. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  708. ((p.oper[0]^.typ = top_const) or
  709. ((p.oper[0]^.typ = top_reg) and
  710. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  711. ((p.oper[0]^.typ = top_ref) and
  712. not RegInRef(reg,p.oper[0]^.ref^)))) or
  713. ((p.opcode = A_POP) and
  714. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  715. ((p.opcode = A_IMUL) and
  716. (p.ops=3) and
  717. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  718. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  719. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  720. ((((p.opcode = A_IMUL) or
  721. (p.opcode = A_MUL)) and
  722. (p.ops=1)) and
  723. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  724. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  725. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  726. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  727. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  728. {$ifdef x86_64}
  729. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  730. {$endif x86_64}
  731. )) or
  732. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  733. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  734. {$ifdef x86_64}
  735. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  736. {$endif x86_64}
  737. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  738. {$ifndef x86_64}
  739. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  740. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  741. {$endif not x86_64}
  742. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  743. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  744. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  745. {$ifndef x86_64}
  746. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  747. {$endif not x86_64}
  748. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  749. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  750. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  751. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  752. {$ifdef x86_64}
  753. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  754. {$endif x86_64}
  755. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  756. (((p.opcode = A_FSTSW) or
  757. (p.opcode = A_FNSTSW)) and
  758. (p.oper[0]^.typ=top_reg) and
  759. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  760. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  761. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  762. (p.oper[0]^.reg=p.oper[1]^.reg) and
  763. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  764. end;
  765. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  766. var
  767. hp2,hp3 : tai;
  768. begin
  769. { some x86-64 issue a NOP before the real exit code }
  770. if MatchInstruction(p,A_NOP,[]) then
  771. GetNextInstruction(p,p);
  772. result:=assigned(p) and (p.typ=ait_instruction) and
  773. ((taicpu(p).opcode = A_RET) or
  774. ((taicpu(p).opcode=A_LEAVE) and
  775. GetNextInstruction(p,hp2) and
  776. MatchInstruction(hp2,A_RET,[S_NO])
  777. ) or
  778. ((((taicpu(p).opcode=A_MOV) and
  779. MatchOpType(taicpu(p),top_reg,top_reg) and
  780. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  781. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  782. ((taicpu(p).opcode=A_LEA) and
  783. MatchOpType(taicpu(p),top_ref,top_reg) and
  784. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  785. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  786. )
  787. ) and
  788. GetNextInstruction(p,hp2) and
  789. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  790. MatchOpType(taicpu(hp2),top_reg) and
  791. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  792. GetNextInstruction(hp2,hp3) and
  793. MatchInstruction(hp3,A_RET,[S_NO])
  794. )
  795. );
  796. end;
  797. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  798. begin
  799. isFoldableArithOp := False;
  800. case hp1.opcode of
  801. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  802. isFoldableArithOp :=
  803. ((taicpu(hp1).oper[0]^.typ = top_const) or
  804. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  805. (taicpu(hp1).oper[0]^.reg <> reg))) and
  806. (taicpu(hp1).oper[1]^.typ = top_reg) and
  807. (taicpu(hp1).oper[1]^.reg = reg);
  808. A_INC,A_DEC,A_NEG,A_NOT:
  809. isFoldableArithOp :=
  810. (taicpu(hp1).oper[0]^.typ = top_reg) and
  811. (taicpu(hp1).oper[0]^.reg = reg);
  812. end;
  813. end;
  814. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  815. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  816. var
  817. hp2: tai;
  818. begin
  819. hp2 := p;
  820. repeat
  821. hp2 := tai(hp2.previous);
  822. if assigned(hp2) and
  823. (hp2.typ = ait_regalloc) and
  824. (tai_regalloc(hp2).ratype=ra_dealloc) and
  825. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  826. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  827. begin
  828. asml.remove(hp2);
  829. hp2.free;
  830. break;
  831. end;
  832. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  833. end;
  834. begin
  835. case current_procinfo.procdef.returndef.typ of
  836. arraydef,recorddef,pointerdef,
  837. stringdef,enumdef,procdef,objectdef,errordef,
  838. filedef,setdef,procvardef,
  839. classrefdef,forwarddef:
  840. DoRemoveLastDeallocForFuncRes(RS_EAX);
  841. orddef:
  842. if current_procinfo.procdef.returndef.size <> 0 then
  843. begin
  844. DoRemoveLastDeallocForFuncRes(RS_EAX);
  845. { for int64/qword }
  846. if current_procinfo.procdef.returndef.size = 8 then
  847. DoRemoveLastDeallocForFuncRes(RS_EDX);
  848. end;
  849. end;
  850. end;
  851. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  852. var
  853. TmpUsedRegs : TAllUsedRegs;
  854. hp1,hp2 : tai;
  855. alloc ,dealloc: tai_regalloc;
  856. begin
  857. result:=false;
  858. if MatchOpType(taicpu(p),top_reg,top_reg) and
  859. GetNextInstruction(p, hp1) and
  860. (hp1.typ = ait_instruction) and
  861. GetNextInstruction(hp1, hp2) and
  862. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  863. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  864. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  865. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  866. (((taicpu(p).opcode=A_MOVAPS) and
  867. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  868. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  869. ((taicpu(p).opcode=A_MOVAPD) and
  870. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  871. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  872. ) then
  873. { change
  874. movapX reg,reg2
  875. addsX/subsX/... reg3, reg2
  876. movapX reg2,reg
  877. to
  878. addsX/subsX/... reg3,reg
  879. }
  880. begin
  881. CopyUsedRegs(TmpUsedRegs);
  882. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  883. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  884. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  885. begin
  886. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  887. std_op2str[taicpu(p).opcode]+' '+
  888. std_op2str[taicpu(hp1).opcode]+' '+
  889. std_op2str[taicpu(hp2).opcode]+') done',p);
  890. { we cannot eliminate the first move if
  891. the operations uses the same register for source and dest }
  892. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  893. begin
  894. asml.remove(p);
  895. p.Free;
  896. end;
  897. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  898. asml.remove(hp2);
  899. hp2.Free;
  900. p:=hp1;
  901. result:=true;
  902. end;
  903. ReleaseUsedRegs(TmpUsedRegs);
  904. end
  905. end;
  906. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  907. var
  908. TmpUsedRegs : TAllUsedRegs;
  909. hp1,hp2 : tai;
  910. begin
  911. result:=false;
  912. if MatchOpType(taicpu(p),top_reg,top_reg) then
  913. begin
  914. { vmova* reg1,reg1
  915. =>
  916. <nop> }
  917. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  918. begin
  919. GetNextInstruction(p,hp1);
  920. asml.Remove(p);
  921. p.Free;
  922. p:=hp1;
  923. result:=true;
  924. end
  925. else if GetNextInstruction(p,hp1) then
  926. begin
  927. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  928. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  929. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  930. begin
  931. { vmova* reg1,reg2
  932. vmova* reg2,reg3
  933. dealloc reg2
  934. =>
  935. vmova* reg1,reg3 }
  936. CopyUsedRegs(TmpUsedRegs);
  937. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  938. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  939. begin
  940. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  941. asml.Remove(hp1);
  942. hp1.Free;
  943. result:=true;
  944. end
  945. { special case:
  946. vmova* reg1,reg2
  947. vmova* reg2,reg1
  948. =>
  949. vmova* reg1,reg2 }
  950. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  951. begin
  952. asml.Remove(hp1);
  953. hp1.Free;
  954. result:=true;
  955. end
  956. end
  957. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  958. { we mix single and double opperations here because we assume that the compiler
  959. generates vmovapd only after double operations and vmovaps only after single operations }
  960. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  961. GetNextInstruction(hp1,hp2) and
  962. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  963. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  964. begin
  965. CopyUsedRegs(TmpUsedRegs);
  966. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  967. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  968. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  969. then
  970. begin
  971. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  972. asml.Remove(p);
  973. p.Free;
  974. asml.Remove(hp2);
  975. hp2.Free;
  976. p:=hp1;
  977. end;
  978. end;
  979. end;
  980. end;
  981. end;
  982. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  983. var
  984. TmpUsedRegs : TAllUsedRegs;
  985. hp1 : tai;
  986. begin
  987. result:=false;
  988. { replace
  989. V<Op>X %mreg1,%mreg2,%mreg3
  990. VMovX %mreg3,%mreg4
  991. dealloc %mreg3
  992. by
  993. V<Op>X %mreg1,%mreg2,%mreg4
  994. ?
  995. }
  996. if GetNextInstruction(p,hp1) and
  997. { we mix single and double opperations here because we assume that the compiler
  998. generates vmovapd only after double operations and vmovaps only after single operations }
  999. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1000. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1001. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1002. begin
  1003. CopyUsedRegs(TmpUsedRegs);
  1004. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1005. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1006. ) then
  1007. begin
  1008. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1009. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1010. asml.Remove(hp1);
  1011. hp1.Free;
  1012. result:=true;
  1013. end;
  1014. end;
  1015. end;
  1016. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1017. var
  1018. hp1, hp2: tai;
  1019. TmpUsedRegs : TAllUsedRegs;
  1020. GetNextIntruction_p : Boolean;
  1021. begin
  1022. Result:=false;
  1023. { remove mov reg1,reg1? }
  1024. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1025. begin
  1026. GetNextInstruction(p, hp1);
  1027. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1028. asml.remove(p);
  1029. p.free;
  1030. p:=hp1;
  1031. Result:=true;
  1032. exit;
  1033. end;
  1034. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  1035. if GetNextIntruction_p and
  1036. MatchInstruction(hp1,A_AND,[]) and
  1037. (taicpu(p).oper[1]^.typ = top_reg) and
  1038. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1039. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1040. case taicpu(p).opsize Of
  1041. S_L:
  1042. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1043. begin
  1044. DebugMsg('PeepHole Optimization MovAnd2Mov done',p);
  1045. asml.remove(hp1);
  1046. hp1.free;
  1047. Result:=true;
  1048. exit;
  1049. end;
  1050. end
  1051. else if GetNextIntruction_p and
  1052. MatchInstruction(hp1,A_MOV,[]) and
  1053. (taicpu(p).oper[1]^.typ = top_reg) and
  1054. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1055. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1056. begin
  1057. CopyUsedRegs(TmpUsedRegs);
  1058. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1059. { we have
  1060. mov x, %treg
  1061. mov %treg, y
  1062. }
  1063. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1064. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1065. { we've got
  1066. mov x, %treg
  1067. mov %treg, y
  1068. with %treg is not used after }
  1069. case taicpu(p).oper[0]^.typ Of
  1070. top_reg:
  1071. begin
  1072. { change
  1073. mov %reg, %treg
  1074. mov %treg, y
  1075. to
  1076. mov %reg, y
  1077. }
  1078. if taicpu(hp1).oper[1]^.typ=top_reg then
  1079. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1080. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1081. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1082. asml.remove(hp1);
  1083. hp1.free;
  1084. ReleaseUsedRegs(TmpUsedRegs);
  1085. Result:=true;
  1086. Exit;
  1087. end;
  1088. top_ref:
  1089. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1090. begin
  1091. { change
  1092. mov mem, %treg
  1093. mov %treg, %reg
  1094. to
  1095. mov mem, %reg"
  1096. }
  1097. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1098. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1099. asml.remove(hp1);
  1100. hp1.free;
  1101. ReleaseUsedRegs(TmpUsedRegs);
  1102. Result:=true;
  1103. Exit;
  1104. end;
  1105. end;
  1106. ReleaseUsedRegs(TmpUsedRegs);
  1107. end
  1108. else
  1109. { Change
  1110. mov %reg1, %reg2
  1111. xxx %reg2, ???
  1112. to
  1113. mov %reg1, %reg2
  1114. xxx %reg1, ???
  1115. to avoid a write/read penalty
  1116. }
  1117. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1118. GetNextInstruction(p,hp1) and
  1119. (tai(hp1).typ = ait_instruction) and
  1120. (taicpu(hp1).ops >= 1) and
  1121. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1122. { we have
  1123. mov %reg1, %reg2
  1124. XXX %reg2, ???
  1125. }
  1126. begin
  1127. if ((taicpu(hp1).opcode = A_OR) or
  1128. (taicpu(hp1).opcode = A_TEST)) and
  1129. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1130. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1131. { we have
  1132. mov %reg1, %reg2
  1133. test/or %reg2, %reg2
  1134. }
  1135. begin
  1136. CopyUsedRegs(TmpUsedRegs);
  1137. { reg1 will be used after the first instruction,
  1138. so update the allocation info }
  1139. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1140. if GetNextInstruction(hp1, hp2) and
  1141. (hp2.typ = ait_instruction) and
  1142. taicpu(hp2).is_jmp and
  1143. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1144. { change
  1145. mov %reg1, %reg2
  1146. test/or %reg2, %reg2
  1147. jxx
  1148. to
  1149. test %reg1, %reg1
  1150. jxx
  1151. }
  1152. begin
  1153. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1154. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1155. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1156. asml.remove(p);
  1157. p.free;
  1158. p := hp1;
  1159. ReleaseUsedRegs(TmpUsedRegs);
  1160. Exit;
  1161. end
  1162. else
  1163. { change
  1164. mov %reg1, %reg2
  1165. test/or %reg2, %reg2
  1166. to
  1167. mov %reg1, %reg2
  1168. test/or %reg1, %reg1
  1169. }
  1170. begin
  1171. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1172. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1173. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1174. end;
  1175. ReleaseUsedRegs(TmpUsedRegs);
  1176. end
  1177. end
  1178. else
  1179. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1180. x >= RetOffset) as it doesn't do anything (it writes either to a
  1181. parameter or to the temporary storage room for the function
  1182. result)
  1183. }
  1184. if GetNextIntruction_p and
  1185. (tai(hp1).typ = ait_instruction) then
  1186. begin
  1187. if IsExitCode(hp1) and
  1188. MatchOpType(taicpu(p),top_reg,top_ref) and
  1189. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1190. not(assigned(current_procinfo.procdef.funcretsym) and
  1191. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1192. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1193. begin
  1194. asml.remove(p);
  1195. p.free;
  1196. p:=hp1;
  1197. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1198. RemoveLastDeallocForFuncRes(p);
  1199. exit;
  1200. end
  1201. { change
  1202. mov reg1, mem1
  1203. test/cmp x, mem1
  1204. to
  1205. mov reg1, mem1
  1206. test/cmp x, reg1
  1207. }
  1208. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1209. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1210. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1211. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1212. begin
  1213. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1214. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1215. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1216. end;
  1217. end;
  1218. { Next instruction is also a MOV ? }
  1219. if GetNextIntruction_p and
  1220. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1221. begin
  1222. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1223. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1224. { mov reg1, mem1 or mov mem1, reg1
  1225. mov mem2, reg2 mov reg2, mem2}
  1226. begin
  1227. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1228. { mov reg1, mem1 or mov mem1, reg1
  1229. mov mem2, reg1 mov reg2, mem1}
  1230. begin
  1231. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1232. { Removes the second statement from
  1233. mov reg1, mem1/reg2
  1234. mov mem1/reg2, reg1 }
  1235. begin
  1236. if taicpu(p).oper[0]^.typ=top_reg then
  1237. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1238. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1239. asml.remove(hp1);
  1240. hp1.free;
  1241. Result:=true;
  1242. exit;
  1243. end
  1244. else
  1245. begin
  1246. CopyUsedRegs(TmpUsedRegs);
  1247. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1248. if (taicpu(p).oper[1]^.typ = top_ref) and
  1249. { mov reg1, mem1
  1250. mov mem2, reg1 }
  1251. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1252. GetNextInstruction(hp1, hp2) and
  1253. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1254. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1255. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1256. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1257. { change to
  1258. mov reg1, mem1 mov reg1, mem1
  1259. mov mem2, reg1 cmp reg1, mem2
  1260. cmp mem1, reg1
  1261. }
  1262. begin
  1263. asml.remove(hp2);
  1264. hp2.free;
  1265. taicpu(hp1).opcode := A_CMP;
  1266. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1267. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1268. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1269. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1270. end;
  1271. ReleaseUsedRegs(TmpUsedRegs);
  1272. end;
  1273. end
  1274. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1275. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1276. begin
  1277. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1278. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1279. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1280. end
  1281. else
  1282. begin
  1283. CopyUsedRegs(TmpUsedRegs);
  1284. if GetNextInstruction(hp1, hp2) and
  1285. MatchOpType(taicpu(p),top_ref,top_reg) and
  1286. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1287. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1288. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1289. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1290. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1291. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1292. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1293. { mov mem1, %reg1
  1294. mov %reg1, mem2
  1295. mov mem2, reg2
  1296. to:
  1297. mov mem1, reg2
  1298. mov reg2, mem2}
  1299. begin
  1300. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1301. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1302. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1303. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1304. asml.remove(hp2);
  1305. hp2.free;
  1306. end
  1307. {$ifdef i386}
  1308. { this is enabled for i386 only, as the rules to create the reg sets below
  1309. are too complicated for x86-64, so this makes this code too error prone
  1310. on x86-64
  1311. }
  1312. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1313. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1314. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1315. { mov mem1, reg1 mov mem1, reg1
  1316. mov reg1, mem2 mov reg1, mem2
  1317. mov mem2, reg2 mov mem2, reg1
  1318. to: to:
  1319. mov mem1, reg1 mov mem1, reg1
  1320. mov mem1, reg2 mov reg1, mem2
  1321. mov reg1, mem2
  1322. or (if mem1 depends on reg1
  1323. and/or if mem2 depends on reg2)
  1324. to:
  1325. mov mem1, reg1
  1326. mov reg1, mem2
  1327. mov reg1, reg2
  1328. }
  1329. begin
  1330. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1331. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1332. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1333. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1334. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1335. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1336. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1337. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1338. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1339. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1340. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1341. end
  1342. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1343. begin
  1344. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1345. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1346. end
  1347. else
  1348. begin
  1349. asml.remove(hp2);
  1350. hp2.free;
  1351. end
  1352. {$endif i386}
  1353. ;
  1354. ReleaseUsedRegs(TmpUsedRegs);
  1355. end;
  1356. end
  1357. (* { movl [mem1],reg1
  1358. movl [mem1],reg2
  1359. to
  1360. movl [mem1],reg1
  1361. movl reg1,reg2
  1362. }
  1363. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1364. (taicpu(p).oper[1]^.typ = top_reg) and
  1365. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1366. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1367. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1368. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1369. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1370. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1371. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1372. else*)
  1373. { movl const1,[mem1]
  1374. movl [mem1],reg1
  1375. to
  1376. movl const1,reg1
  1377. movl reg1,[mem1]
  1378. }
  1379. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1380. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1381. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1382. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1383. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1384. begin
  1385. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1386. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1387. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1388. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1389. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1390. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1391. end
  1392. end
  1393. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1394. GetNextIntruction_p and
  1395. (hp1.typ = ait_instruction) and
  1396. GetNextInstruction(hp1, hp2) and
  1397. MatchInstruction(hp2,A_MOV,[]) and
  1398. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1399. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1400. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1401. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1402. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1403. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1404. ) then
  1405. { change movsX/movzX reg/ref, reg2
  1406. add/sub/or/... reg3/$const, reg2
  1407. mov reg2 reg/ref
  1408. to add/sub/or/... reg3/$const, reg/ref }
  1409. begin
  1410. CopyUsedRegs(TmpUsedRegs);
  1411. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1412. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1413. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1414. begin
  1415. { by example:
  1416. movswl %si,%eax movswl %si,%eax p
  1417. decl %eax addl %edx,%eax hp1
  1418. movw %ax,%si movw %ax,%si hp2
  1419. ->
  1420. movswl %si,%eax movswl %si,%eax p
  1421. decw %eax addw %edx,%eax hp1
  1422. movw %ax,%si movw %ax,%si hp2
  1423. }
  1424. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1425. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1426. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1427. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1428. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1429. {
  1430. ->
  1431. movswl %si,%eax movswl %si,%eax p
  1432. decw %si addw %dx,%si hp1
  1433. movw %ax,%si movw %ax,%si hp2
  1434. }
  1435. case taicpu(hp1).ops of
  1436. 1:
  1437. begin
  1438. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1439. if taicpu(hp1).oper[0]^.typ=top_reg then
  1440. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1441. end;
  1442. 2:
  1443. begin
  1444. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1445. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1446. (taicpu(hp1).opcode<>A_SHL) and
  1447. (taicpu(hp1).opcode<>A_SHR) and
  1448. (taicpu(hp1).opcode<>A_SAR) then
  1449. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1450. end;
  1451. else
  1452. internalerror(2008042701);
  1453. end;
  1454. {
  1455. ->
  1456. decw %si addw %dx,%si p
  1457. }
  1458. asml.remove(p);
  1459. asml.remove(hp2);
  1460. p.Free;
  1461. hp2.Free;
  1462. p := hp1;
  1463. end;
  1464. ReleaseUsedRegs(TmpUsedRegs);
  1465. end
  1466. else if GetNextIntruction_p and
  1467. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1468. GetNextInstruction(hp1, hp2) and
  1469. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1470. MatchOperand(Taicpu(p).oper[0]^,0) and
  1471. (Taicpu(p).oper[1]^.typ = top_reg) and
  1472. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1473. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1474. { mov reg1,0
  1475. bts reg1,operand1 --> mov reg1,operand2
  1476. or reg1,operand2 bts reg1,operand1}
  1477. begin
  1478. Taicpu(hp2).opcode:=A_MOV;
  1479. asml.remove(hp1);
  1480. insertllitem(hp2,hp2.next,hp1);
  1481. asml.remove(p);
  1482. p.free;
  1483. p:=hp1;
  1484. end
  1485. else if GetNextIntruction_p and
  1486. MatchInstruction(hp1,A_LEA,[S_L]) and
  1487. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1488. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1489. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1490. ) or
  1491. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1492. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1493. )
  1494. ) then
  1495. { mov reg1,ref
  1496. lea reg2,[reg1,reg2]
  1497. to
  1498. add reg2,ref}
  1499. begin
  1500. CopyUsedRegs(TmpUsedRegs);
  1501. { reg1 may not be used afterwards }
  1502. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1503. begin
  1504. Taicpu(hp1).opcode:=A_ADD;
  1505. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1506. DebugMsg('Peephole MovLea2Add done',hp1);
  1507. asml.remove(p);
  1508. p.free;
  1509. p:=hp1;
  1510. end;
  1511. ReleaseUsedRegs(TmpUsedRegs);
  1512. end;
  1513. end;
  1514. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1515. var
  1516. hp1 : tai;
  1517. begin
  1518. Result:=false;
  1519. if taicpu(p).ops <> 2 then
  1520. exit;
  1521. if GetNextInstruction(p,hp1) and
  1522. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1523. (taicpu(hp1).ops = 2) then
  1524. begin
  1525. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1526. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1527. { movXX reg1, mem1 or movXX mem1, reg1
  1528. movXX mem2, reg2 movXX reg2, mem2}
  1529. begin
  1530. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1531. { movXX reg1, mem1 or movXX mem1, reg1
  1532. movXX mem2, reg1 movXX reg2, mem1}
  1533. begin
  1534. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1535. begin
  1536. { Removes the second statement from
  1537. movXX reg1, mem1/reg2
  1538. movXX mem1/reg2, reg1
  1539. }
  1540. if taicpu(p).oper[0]^.typ=top_reg then
  1541. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1542. { Removes the second statement from
  1543. movXX mem1/reg1, reg2
  1544. movXX reg2, mem1/reg1
  1545. }
  1546. if (taicpu(p).oper[1]^.typ=top_reg) and
  1547. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1548. begin
  1549. asml.remove(p);
  1550. p.free;
  1551. GetNextInstruction(hp1,p);
  1552. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1553. end
  1554. else
  1555. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1556. asml.remove(hp1);
  1557. hp1.free;
  1558. Result:=true;
  1559. exit;
  1560. end
  1561. end;
  1562. end;
  1563. end;
  1564. end;
  1565. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1566. var
  1567. TmpUsedRegs : TAllUsedRegs;
  1568. hp1 : tai;
  1569. begin
  1570. result:=false;
  1571. { replace
  1572. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1573. MovX %mreg2,%mreg1
  1574. dealloc %mreg2
  1575. by
  1576. <Op>X %mreg2,%mreg1
  1577. ?
  1578. }
  1579. if GetNextInstruction(p,hp1) and
  1580. { we mix single and double opperations here because we assume that the compiler
  1581. generates vmovapd only after double operations and vmovaps only after single operations }
  1582. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1583. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1584. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1585. (taicpu(p).oper[0]^.typ=top_reg) then
  1586. begin
  1587. CopyUsedRegs(TmpUsedRegs);
  1588. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1589. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)
  1590. ) then
  1591. begin
  1592. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1593. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1594. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1595. asml.Remove(hp1);
  1596. hp1.Free;
  1597. result:=true;
  1598. end;
  1599. end;
  1600. end;
  1601. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1602. var
  1603. TmpUsedRegs : TAllUsedRegs;
  1604. hp1,hp2: tai;
  1605. begin
  1606. Result:=false;
  1607. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1608. GetNextInstruction(p, hp1) and
  1609. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1610. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1611. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1612. or
  1613. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1614. ) and
  1615. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1616. { mov reg1, reg2
  1617. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1618. begin
  1619. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1620. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1621. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1622. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1623. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1624. asml.remove(p);
  1625. p.free;
  1626. p := hp1;
  1627. Result:=true;
  1628. exit;
  1629. end
  1630. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1631. GetNextInstruction(p,hp1) and
  1632. (hp1.typ = ait_instruction) and
  1633. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1634. doing it separately in both branches allows to do the cheap checks
  1635. with low probability earlier }
  1636. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1637. GetNextInstruction(hp1,hp2) and
  1638. MatchInstruction(hp2,A_MOV,[])
  1639. ) or
  1640. ((taicpu(hp1).opcode=A_LEA) and
  1641. GetNextInstruction(hp1,hp2) and
  1642. MatchInstruction(hp2,A_MOV,[]) and
  1643. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1644. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1645. ) or
  1646. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1647. taicpu(p).oper[1]^.reg) and
  1648. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1649. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1650. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1651. ) and
  1652. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1653. )
  1654. ) and
  1655. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1656. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1657. begin
  1658. CopyUsedRegs(TmpUsedRegs);
  1659. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1660. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1661. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1662. { change mov (ref), reg
  1663. add/sub/or/... reg2/$const, reg
  1664. mov reg, (ref)
  1665. # release reg
  1666. to add/sub/or/... reg2/$const, (ref) }
  1667. begin
  1668. case taicpu(hp1).opcode of
  1669. A_INC,A_DEC,A_NOT,A_NEG :
  1670. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1671. A_LEA :
  1672. begin
  1673. taicpu(hp1).opcode:=A_ADD;
  1674. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1675. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1676. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1677. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1678. else
  1679. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1680. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1681. DebugMsg('Peephole FoldLea done',hp1);
  1682. end
  1683. else
  1684. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1685. end;
  1686. asml.remove(p);
  1687. asml.remove(hp2);
  1688. p.free;
  1689. hp2.free;
  1690. p := hp1
  1691. end;
  1692. ReleaseUsedRegs(TmpUsedRegs);
  1693. end;
  1694. end;
  1695. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1696. var
  1697. TmpUsedRegs : TAllUsedRegs;
  1698. hp1 : tai;
  1699. begin
  1700. Result:=false;
  1701. if (taicpu(p).ops >= 2) and
  1702. ((taicpu(p).oper[0]^.typ = top_const) or
  1703. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1704. (taicpu(p).oper[1]^.typ = top_reg) and
  1705. ((taicpu(p).ops = 2) or
  1706. ((taicpu(p).oper[2]^.typ = top_reg) and
  1707. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1708. GetLastInstruction(p,hp1) and
  1709. MatchInstruction(hp1,A_MOV,[]) and
  1710. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1711. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1712. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1713. begin
  1714. CopyUsedRegs(TmpUsedRegs);
  1715. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1716. { change
  1717. mov reg1,reg2
  1718. imul y,reg2 to imul y,reg1,reg2 }
  1719. begin
  1720. taicpu(p).ops := 3;
  1721. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1722. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1723. DebugMsg('Peephole MovImul2Imul done',p);
  1724. asml.remove(hp1);
  1725. hp1.free;
  1726. result:=true;
  1727. end;
  1728. ReleaseUsedRegs(TmpUsedRegs);
  1729. end;
  1730. end;
  1731. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1732. var
  1733. hp1 : tai;
  1734. begin
  1735. {
  1736. change
  1737. jmp .L1
  1738. ...
  1739. .L1:
  1740. ret
  1741. into
  1742. ret
  1743. }
  1744. result:=false;
  1745. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1746. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1747. begin
  1748. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1749. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1750. MatchInstruction(hp1,A_RET,[S_NO]) then
  1751. begin
  1752. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1753. taicpu(p).opcode:=A_RET;
  1754. taicpu(p).is_jmp:=false;
  1755. taicpu(p).ops:=taicpu(hp1).ops;
  1756. case taicpu(hp1).ops of
  1757. 0:
  1758. taicpu(p).clearop(0);
  1759. 1:
  1760. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1761. else
  1762. internalerror(2016041301);
  1763. end;
  1764. result:=true;
  1765. end;
  1766. end;
  1767. end;
  1768. function CanBeCMOV(p : tai) : boolean;
  1769. begin
  1770. CanBeCMOV:=assigned(p) and
  1771. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1772. { we can't use cmov ref,reg because
  1773. ref could be nil and cmov still throws an exception
  1774. if ref=nil but the mov isn't done (FK)
  1775. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1776. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1777. }
  1778. MatchOpType(taicpu(p),top_reg,top_reg);
  1779. end;
  1780. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1781. var
  1782. hp1,hp2,hp3: tai;
  1783. carryadd_opcode : TAsmOp;
  1784. l : Longint;
  1785. condition : TAsmCond;
  1786. begin
  1787. { jb @@1 cmc
  1788. inc/dec operand --> adc/sbb operand,0
  1789. @@1:
  1790. ... and ...
  1791. jnb @@1
  1792. inc/dec operand --> adc/sbb operand,0
  1793. @@1: }
  1794. result:=false;
  1795. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1796. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1797. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1798. begin
  1799. carryadd_opcode:=A_NONE;
  1800. if Taicpu(p).condition in [C_NAE,C_B] then
  1801. begin
  1802. if Taicpu(hp1).opcode=A_INC then
  1803. carryadd_opcode:=A_ADC;
  1804. if Taicpu(hp1).opcode=A_DEC then
  1805. carryadd_opcode:=A_SBB;
  1806. if carryadd_opcode<>A_NONE then
  1807. begin
  1808. Taicpu(p).clearop(0);
  1809. Taicpu(p).ops:=0;
  1810. Taicpu(p).is_jmp:=false;
  1811. Taicpu(p).opcode:=A_CMC;
  1812. Taicpu(p).condition:=C_NONE;
  1813. Taicpu(hp1).ops:=2;
  1814. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1815. Taicpu(hp1).loadconst(0,0);
  1816. Taicpu(hp1).opcode:=carryadd_opcode;
  1817. result:=true;
  1818. exit;
  1819. end;
  1820. end;
  1821. if Taicpu(p).condition in [C_AE,C_NB] then
  1822. begin
  1823. if Taicpu(hp1).opcode=A_INC then
  1824. carryadd_opcode:=A_ADC;
  1825. if Taicpu(hp1).opcode=A_DEC then
  1826. carryadd_opcode:=A_SBB;
  1827. if carryadd_opcode<>A_NONE then
  1828. begin
  1829. asml.remove(p);
  1830. p.free;
  1831. Taicpu(hp1).ops:=2;
  1832. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1833. Taicpu(hp1).loadconst(0,0);
  1834. Taicpu(hp1).opcode:=carryadd_opcode;
  1835. p:=hp1;
  1836. result:=true;
  1837. exit;
  1838. end;
  1839. end;
  1840. end;
  1841. {$ifndef i8086}
  1842. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1843. begin
  1844. { check for
  1845. jCC xxx
  1846. <several movs>
  1847. xxx:
  1848. }
  1849. l:=0;
  1850. GetNextInstruction(p, hp1);
  1851. while assigned(hp1) and
  1852. CanBeCMOV(hp1) and
  1853. { stop on labels }
  1854. not(hp1.typ=ait_label) do
  1855. begin
  1856. inc(l);
  1857. GetNextInstruction(hp1,hp1);
  1858. end;
  1859. if assigned(hp1) then
  1860. begin
  1861. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1862. begin
  1863. if (l<=4) and (l>0) then
  1864. begin
  1865. condition:=inverse_cond(taicpu(p).condition);
  1866. hp2:=p;
  1867. GetNextInstruction(p,hp1);
  1868. p:=hp1;
  1869. repeat
  1870. taicpu(hp1).opcode:=A_CMOVcc;
  1871. taicpu(hp1).condition:=condition;
  1872. GetNextInstruction(hp1,hp1);
  1873. until not(assigned(hp1)) or
  1874. not(CanBeCMOV(hp1));
  1875. { wait with removing else GetNextInstruction could
  1876. ignore the label if it was the only usage in the
  1877. jump moved away }
  1878. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1879. { if the label refs. reach zero, remove any alignment before the label }
  1880. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1881. begin
  1882. asml.Remove(hp1);
  1883. hp1.Free;
  1884. end;
  1885. asml.remove(hp2);
  1886. hp2.free;
  1887. result:=true;
  1888. exit;
  1889. end;
  1890. end
  1891. else
  1892. begin
  1893. { check further for
  1894. jCC xxx
  1895. <several movs 1>
  1896. jmp yyy
  1897. xxx:
  1898. <several movs 2>
  1899. yyy:
  1900. }
  1901. { hp2 points to jmp yyy }
  1902. hp2:=hp1;
  1903. { skip hp1 to xxx }
  1904. GetNextInstruction(hp1, hp1);
  1905. if assigned(hp2) and
  1906. assigned(hp1) and
  1907. (l<=3) and
  1908. (hp2.typ=ait_instruction) and
  1909. (taicpu(hp2).is_jmp) and
  1910. (taicpu(hp2).condition=C_None) and
  1911. { real label and jump, no further references to the
  1912. label are allowed }
  1913. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1914. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1915. begin
  1916. l:=0;
  1917. { skip hp1 to <several moves 2> }
  1918. GetNextInstruction(hp1, hp1);
  1919. while assigned(hp1) and
  1920. CanBeCMOV(hp1) do
  1921. begin
  1922. inc(l);
  1923. GetNextInstruction(hp1, hp1);
  1924. end;
  1925. { hp1 points to yyy: }
  1926. if assigned(hp1) and
  1927. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1928. begin
  1929. condition:=inverse_cond(taicpu(p).condition);
  1930. GetNextInstruction(p,hp1);
  1931. hp3:=p;
  1932. p:=hp1;
  1933. repeat
  1934. taicpu(hp1).opcode:=A_CMOVcc;
  1935. taicpu(hp1).condition:=condition;
  1936. GetNextInstruction(hp1,hp1);
  1937. until not(assigned(hp1)) or
  1938. not(CanBeCMOV(hp1));
  1939. { hp2 is still at jmp yyy }
  1940. GetNextInstruction(hp2,hp1);
  1941. { hp2 is now at xxx: }
  1942. condition:=inverse_cond(condition);
  1943. GetNextInstruction(hp1,hp1);
  1944. { hp1 is now at <several movs 2> }
  1945. repeat
  1946. taicpu(hp1).opcode:=A_CMOVcc;
  1947. taicpu(hp1).condition:=condition;
  1948. GetNextInstruction(hp1,hp1);
  1949. until not(assigned(hp1)) or
  1950. not(CanBeCMOV(hp1));
  1951. {
  1952. asml.remove(hp1.next)
  1953. hp1.next.free;
  1954. asml.remove(hp1);
  1955. hp1.free;
  1956. }
  1957. { remove jCC }
  1958. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1959. asml.remove(hp3);
  1960. hp3.free;
  1961. { remove jmp }
  1962. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1963. asml.remove(hp2);
  1964. hp2.free;
  1965. result:=true;
  1966. exit;
  1967. end;
  1968. end;
  1969. end;
  1970. end;
  1971. end;
  1972. {$endif i8086}
  1973. end;
  1974. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  1975. var
  1976. hp1,hp2: tai;
  1977. begin
  1978. result:=false;
  1979. if (taicpu(p).oper[1]^.typ = top_reg) and
  1980. GetNextInstruction(p,hp1) and
  1981. (hp1.typ = ait_instruction) and
  1982. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1983. GetNextInstruction(hp1,hp2) and
  1984. MatchInstruction(hp2,A_MOV,[]) and
  1985. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1986. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1987. {$ifdef i386}
  1988. { not all registers have byte size sub registers on i386 }
  1989. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  1990. {$endif i386}
  1991. (((taicpu(hp1).ops=2) and
  1992. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1993. ((taicpu(hp1).ops=1) and
  1994. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1995. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1996. begin
  1997. { change movsX/movzX reg/ref, reg2
  1998. add/sub/or/... reg3/$const, reg2
  1999. mov reg2 reg/ref
  2000. to add/sub/or/... reg3/$const, reg/ref }
  2001. { by example:
  2002. movswl %si,%eax movswl %si,%eax p
  2003. decl %eax addl %edx,%eax hp1
  2004. movw %ax,%si movw %ax,%si hp2
  2005. ->
  2006. movswl %si,%eax movswl %si,%eax p
  2007. decw %eax addw %edx,%eax hp1
  2008. movw %ax,%si movw %ax,%si hp2
  2009. }
  2010. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2011. {
  2012. ->
  2013. movswl %si,%eax movswl %si,%eax p
  2014. decw %si addw %dx,%si hp1
  2015. movw %ax,%si movw %ax,%si hp2
  2016. }
  2017. case taicpu(hp1).ops of
  2018. 1:
  2019. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2020. 2:
  2021. begin
  2022. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2023. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2024. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2025. end;
  2026. else
  2027. internalerror(2008042701);
  2028. end;
  2029. {
  2030. ->
  2031. decw %si addw %dx,%si p
  2032. }
  2033. DebugMsg('PeepHole Optimization,var3',p);
  2034. asml.remove(p);
  2035. asml.remove(hp2);
  2036. p.free;
  2037. hp2.free;
  2038. p:=hp1;
  2039. end
  2040. { removes superfluous And's after movzx's }
  2041. else if taicpu(p).opcode=A_MOVZX then
  2042. begin
  2043. if (taicpu(p).oper[1]^.typ = top_reg) and
  2044. GetNextInstruction(p, hp1) and
  2045. (tai(hp1).typ = ait_instruction) and
  2046. (taicpu(hp1).opcode = A_AND) and
  2047. (taicpu(hp1).oper[0]^.typ = top_const) and
  2048. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2049. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2050. begin
  2051. case taicpu(p).opsize Of
  2052. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2053. if (taicpu(hp1).oper[0]^.val = $ff) then
  2054. begin
  2055. DebugMsg('PeepHole Optimization,var4',p);
  2056. asml.remove(hp1);
  2057. hp1.free;
  2058. end;
  2059. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2060. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2061. begin
  2062. DebugMsg('PeepHole Optimization,var5',p);
  2063. asml.remove(hp1);
  2064. hp1.free;
  2065. end;
  2066. {$ifdef x86_64}
  2067. S_LQ:
  2068. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2069. begin
  2070. if (cs_asm_source in current_settings.globalswitches) then
  2071. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2072. asml.remove(hp1);
  2073. hp1.Free;
  2074. end;
  2075. {$endif x86_64}
  2076. end;
  2077. end;
  2078. { changes some movzx constructs to faster synonims (all examples
  2079. are given with eax/ax, but are also valid for other registers)}
  2080. if (taicpu(p).oper[1]^.typ = top_reg) then
  2081. if (taicpu(p).oper[0]^.typ = top_reg) then
  2082. case taicpu(p).opsize of
  2083. S_BW:
  2084. begin
  2085. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2086. not(cs_opt_size in current_settings.optimizerswitches) then
  2087. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2088. begin
  2089. taicpu(p).opcode := A_AND;
  2090. taicpu(p).changeopsize(S_W);
  2091. taicpu(p).loadConst(0,$ff);
  2092. DebugMsg('PeepHole Optimization,var7',p);
  2093. end
  2094. else if GetNextInstruction(p, hp1) and
  2095. (tai(hp1).typ = ait_instruction) and
  2096. (taicpu(hp1).opcode = A_AND) and
  2097. (taicpu(hp1).oper[0]^.typ = top_const) and
  2098. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2099. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2100. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2101. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2102. begin
  2103. DebugMsg('PeepHole Optimization,var8',p);
  2104. taicpu(p).opcode := A_MOV;
  2105. taicpu(p).changeopsize(S_W);
  2106. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2107. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2108. end;
  2109. end;
  2110. S_BL:
  2111. begin
  2112. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2113. not(cs_opt_size in current_settings.optimizerswitches) then
  2114. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2115. begin
  2116. taicpu(p).opcode := A_AND;
  2117. taicpu(p).changeopsize(S_L);
  2118. taicpu(p).loadConst(0,$ff)
  2119. end
  2120. else if GetNextInstruction(p, hp1) and
  2121. (tai(hp1).typ = ait_instruction) and
  2122. (taicpu(hp1).opcode = A_AND) and
  2123. (taicpu(hp1).oper[0]^.typ = top_const) and
  2124. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2125. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2126. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2127. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2128. begin
  2129. DebugMsg('PeepHole Optimization,var10',p);
  2130. taicpu(p).opcode := A_MOV;
  2131. taicpu(p).changeopsize(S_L);
  2132. { do not use R_SUBWHOLE
  2133. as movl %rdx,%eax
  2134. is invalid in assembler PM }
  2135. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2136. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2137. end
  2138. end;
  2139. {$ifndef i8086}
  2140. S_WL:
  2141. begin
  2142. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2143. not(cs_opt_size in current_settings.optimizerswitches) then
  2144. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2145. begin
  2146. DebugMsg('PeepHole Optimization,var11',p);
  2147. taicpu(p).opcode := A_AND;
  2148. taicpu(p).changeopsize(S_L);
  2149. taicpu(p).loadConst(0,$ffff);
  2150. end
  2151. else if GetNextInstruction(p, hp1) and
  2152. (tai(hp1).typ = ait_instruction) and
  2153. (taicpu(hp1).opcode = A_AND) and
  2154. (taicpu(hp1).oper[0]^.typ = top_const) and
  2155. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2156. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2157. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2158. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2159. begin
  2160. DebugMsg('PeepHole Optimization,var12',p);
  2161. taicpu(p).opcode := A_MOV;
  2162. taicpu(p).changeopsize(S_L);
  2163. { do not use R_SUBWHOLE
  2164. as movl %rdx,%eax
  2165. is invalid in assembler PM }
  2166. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2167. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2168. end;
  2169. end;
  2170. {$endif i8086}
  2171. end
  2172. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2173. begin
  2174. if GetNextInstruction(p, hp1) and
  2175. (tai(hp1).typ = ait_instruction) and
  2176. (taicpu(hp1).opcode = A_AND) and
  2177. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2178. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2179. begin
  2180. taicpu(p).opcode := A_MOV;
  2181. case taicpu(p).opsize Of
  2182. S_BL:
  2183. begin
  2184. DebugMsg('PeepHole Optimization,var13',p);
  2185. taicpu(p).changeopsize(S_L);
  2186. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2187. end;
  2188. S_WL:
  2189. begin
  2190. DebugMsg('PeepHole Optimization,var14',p);
  2191. taicpu(p).changeopsize(S_L);
  2192. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2193. end;
  2194. S_BW:
  2195. begin
  2196. DebugMsg('PeepHole Optimization,var15',p);
  2197. taicpu(p).changeopsize(S_W);
  2198. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2199. end;
  2200. {$ifdef x86_64}
  2201. S_BQ:
  2202. begin
  2203. DebugMsg('PeepHole Optimization,var16',p);
  2204. taicpu(p).changeopsize(S_Q);
  2205. taicpu(hp1).loadConst(
  2206. 0, taicpu(hp1).oper[0]^.val and $ff);
  2207. end;
  2208. S_WQ:
  2209. begin
  2210. DebugMsg('PeepHole Optimization,var17',p);
  2211. taicpu(p).changeopsize(S_Q);
  2212. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2213. end;
  2214. S_LQ:
  2215. begin
  2216. DebugMsg('PeepHole Optimization,var18',p);
  2217. taicpu(p).changeopsize(S_Q);
  2218. taicpu(hp1).loadConst(
  2219. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2220. end;
  2221. {$endif x86_64}
  2222. else
  2223. Internalerror(2017050704)
  2224. end;
  2225. end;
  2226. end;
  2227. end;
  2228. end;
  2229. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2230. var
  2231. hp1 : tai;
  2232. begin
  2233. Result:=false;
  2234. if not(GetNextInstruction(p, hp1)) then
  2235. exit;
  2236. if MatchOpType(taicpu(p),top_const,top_reg) and
  2237. MatchInstruction(hp1,A_AND,[]) and
  2238. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2239. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2240. { the second register must contain the first one, so compare their subreg types }
  2241. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2242. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2243. { change
  2244. and const1, reg
  2245. and const2, reg
  2246. to
  2247. and (const1 and const2), reg
  2248. }
  2249. begin
  2250. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2251. DebugMsg('Peephole AndAnd2And done',hp1);
  2252. asml.remove(p);
  2253. p.Free;
  2254. p:=hp1;
  2255. Result:=true;
  2256. exit;
  2257. end
  2258. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2259. MatchInstruction(hp1,A_MOVZX,[]) and
  2260. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2261. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2262. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2263. (((taicpu(p).opsize=S_W) and
  2264. (taicpu(hp1).opsize=S_BW)) or
  2265. ((taicpu(p).opsize=S_L) and
  2266. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2267. {$ifdef x86_64}
  2268. or
  2269. ((taicpu(p).opsize=S_Q) and
  2270. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2271. {$endif x86_64}
  2272. ) then
  2273. begin
  2274. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2275. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2276. ) or
  2277. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2278. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2279. {$ifdef x86_64}
  2280. or
  2281. (((taicpu(hp1).opsize)=S_LQ) and
  2282. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2283. )
  2284. {$endif x86_64}
  2285. then
  2286. begin
  2287. DebugMsg('Peephole AndMovzToAnd done',p);
  2288. asml.remove(hp1);
  2289. hp1.free;
  2290. end;
  2291. end
  2292. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2293. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2294. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2295. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2296. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2297. (((taicpu(p).opsize=S_W) and
  2298. (taicpu(hp1).opsize=S_BW)) or
  2299. ((taicpu(p).opsize=S_L) and
  2300. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2301. {$ifdef x86_64}
  2302. or
  2303. ((taicpu(p).opsize=S_Q) and
  2304. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2305. {$endif x86_64}
  2306. ) then
  2307. begin
  2308. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2309. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2310. ) or
  2311. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2312. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2313. {$ifdef x86_64}
  2314. or
  2315. (((taicpu(hp1).opsize)=S_LQ) and
  2316. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2317. )
  2318. {$endif x86_64}
  2319. then
  2320. begin
  2321. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2322. asml.remove(hp1);
  2323. hp1.free;
  2324. end;
  2325. end
  2326. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2327. (hp1.typ = ait_instruction) and
  2328. (taicpu(hp1).is_jmp) and
  2329. (taicpu(hp1).opcode<>A_JMP) and
  2330. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2331. { change
  2332. and x, reg
  2333. jxx
  2334. to
  2335. test x, reg
  2336. jxx
  2337. if reg is deallocated before the
  2338. jump, but only if it's a conditional jump (PFV)
  2339. }
  2340. taicpu(p).opcode := A_TEST;
  2341. end;
  2342. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2343. begin
  2344. if MatchOperand(taicpu(p).oper[0]^,0) and
  2345. (taicpu(p).oper[1]^.typ = Top_Reg) and
  2346. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2347. { change "mov $0, %reg" into "xor %reg, %reg" }
  2348. begin
  2349. taicpu(p).opcode := A_XOR;
  2350. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2351. end;
  2352. end;
  2353. end.