aoptx86.pas 193 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  33. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  34. protected
  35. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  36. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  37. { checks whether reading the value in reg1 depends on the value of reg2. This
  38. is very similar to SuperRegisterEquals, except it takes into account that
  39. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  40. depend on the value in AH). }
  41. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  42. procedure DebugMsg(const s : string; p : tai);inline;
  43. class function IsExitCode(p : tai) : boolean;
  44. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  45. procedure RemoveLastDeallocForFuncRes(p : tai);
  46. function DoSubAddOpt(var p : tai) : Boolean;
  47. function PrePeepholeOptSxx(var p : tai) : boolean;
  48. function PrePeepholeOptIMUL(var p : tai) : boolean;
  49. function OptPass1AND(var p : tai) : boolean;
  50. function OptPass1_V_MOVAP(var p : tai) : boolean;
  51. function OptPass1VOP(var p : tai) : boolean;
  52. function OptPass1MOV(var p : tai) : boolean;
  53. function OptPass1Movx(var p : tai) : boolean;
  54. function OptPass1MOVXX(var p : tai) : boolean;
  55. function OptPass1OP(var p : tai) : boolean;
  56. function OptPass1LEA(var p : tai) : boolean;
  57. function OptPass1Sub(var p : tai) : boolean;
  58. function OptPass1SHLSAL(var p : tai) : boolean;
  59. function OptPass1SETcc(var p : tai) : boolean;
  60. function OptPass1FSTP(var p : tai) : boolean;
  61. function OptPass1FLD(var p : tai) : boolean;
  62. function OptPass1Cmp(var p : tai) : boolean;
  63. function OptPass2MOV(var p : tai) : boolean;
  64. function OptPass2Imul(var p : tai) : boolean;
  65. function OptPass2Jmp(var p : tai) : boolean;
  66. function OptPass2Jcc(var p : tai) : boolean;
  67. function OptPass2Lea(var p: tai): Boolean;
  68. function PostPeepholeOptMov(var p : tai) : Boolean;
  69. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  70. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  71. function PostPeepholeOptXor(var p : tai) : Boolean;
  72. {$endif}
  73. function PostPeepholeOptCmp(var p : tai) : Boolean;
  74. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  75. function PostPeepholeOptCall(var p : tai) : Boolean;
  76. function PostPeepholeOptLea(var p : tai) : Boolean;
  77. procedure OptReferences;
  78. end;
  79. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  80. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  81. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  82. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  83. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  84. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  85. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  86. function RefsEqual(const r1, r2: treference): boolean;
  87. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  88. { returns true, if ref is a reference using only the registers passed as base and index
  89. and having an offset }
  90. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  91. implementation
  92. uses
  93. cutils,verbose,
  94. globals,
  95. cpuinfo,
  96. procinfo,
  97. aasmbase,
  98. aoptutils,
  99. symconst,symsym,
  100. cgx86,
  101. itcpugas;
  102. {$ifdef DEBUG_AOPTCPU}
  103. const
  104. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  105. {$else DEBUG_AOPTCPU}
  106. { Empty strings help the optimizer to remove string concatenations that won't
  107. ever appear to the user on release builds. [Kit] }
  108. const
  109. SPeepholeOptimization = '';
  110. {$endif DEBUG_AOPTCPU}
  111. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  112. begin
  113. result :=
  114. (instr.typ = ait_instruction) and
  115. (taicpu(instr).opcode = op) and
  116. ((opsize = []) or (taicpu(instr).opsize in opsize));
  117. end;
  118. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  119. begin
  120. result :=
  121. (instr.typ = ait_instruction) and
  122. ((taicpu(instr).opcode = op1) or
  123. (taicpu(instr).opcode = op2)
  124. ) and
  125. ((opsize = []) or (taicpu(instr).opsize in opsize));
  126. end;
  127. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  128. begin
  129. result :=
  130. (instr.typ = ait_instruction) and
  131. ((taicpu(instr).opcode = op1) or
  132. (taicpu(instr).opcode = op2) or
  133. (taicpu(instr).opcode = op3)
  134. ) and
  135. ((opsize = []) or (taicpu(instr).opsize in opsize));
  136. end;
  137. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  138. const opsize : topsizes) : boolean;
  139. var
  140. op : TAsmOp;
  141. begin
  142. result:=false;
  143. for op in ops do
  144. begin
  145. if (instr.typ = ait_instruction) and
  146. (taicpu(instr).opcode = op) and
  147. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  148. begin
  149. result:=true;
  150. exit;
  151. end;
  152. end;
  153. end;
  154. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  155. begin
  156. result := (oper.typ = top_reg) and (oper.reg = reg);
  157. end;
  158. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  159. begin
  160. result := (oper.typ = top_const) and (oper.val = a);
  161. end;
  162. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  163. begin
  164. result := oper1.typ = oper2.typ;
  165. if result then
  166. case oper1.typ of
  167. top_const:
  168. Result:=oper1.val = oper2.val;
  169. top_reg:
  170. Result:=oper1.reg = oper2.reg;
  171. top_ref:
  172. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  173. else
  174. internalerror(2013102801);
  175. end
  176. end;
  177. function RefsEqual(const r1, r2: treference): boolean;
  178. begin
  179. RefsEqual :=
  180. (r1.offset = r2.offset) and
  181. (r1.segment = r2.segment) and (r1.base = r2.base) and
  182. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  183. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  184. (r1.relsymbol = r2.relsymbol) and
  185. (r1.volatility=[]) and
  186. (r2.volatility=[]);
  187. end;
  188. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  189. begin
  190. Result:=(ref.offset=0) and
  191. (ref.scalefactor in [0,1]) and
  192. (ref.segment=NR_NO) and
  193. (ref.symbol=nil) and
  194. (ref.relsymbol=nil) and
  195. ((base=NR_INVALID) or
  196. (ref.base=base)) and
  197. ((index=NR_INVALID) or
  198. (ref.index=index)) and
  199. (ref.volatility=[]);
  200. end;
  201. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  202. begin
  203. Result:=(ref.scalefactor in [0,1]) and
  204. (ref.segment=NR_NO) and
  205. (ref.symbol=nil) and
  206. (ref.relsymbol=nil) and
  207. ((base=NR_INVALID) or
  208. (ref.base=base)) and
  209. ((index=NR_INVALID) or
  210. (ref.index=index)) and
  211. (ref.volatility=[]);
  212. end;
  213. function InstrReadsFlags(p: tai): boolean;
  214. begin
  215. InstrReadsFlags := true;
  216. case p.typ of
  217. ait_instruction:
  218. if InsProp[taicpu(p).opcode].Ch*
  219. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  220. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  221. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  222. exit;
  223. ait_label:
  224. exit;
  225. else
  226. ;
  227. end;
  228. InstrReadsFlags := false;
  229. end;
  230. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  231. begin
  232. Next:=Current;
  233. repeat
  234. Result:=GetNextInstruction(Next,Next);
  235. until not (Result) or
  236. not(cs_opt_level3 in current_settings.optimizerswitches) or
  237. (Next.typ<>ait_instruction) or
  238. RegInInstruction(reg,Next) or
  239. is_calljmp(taicpu(Next).opcode);
  240. end;
  241. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  242. begin
  243. Result:=RegReadByInstruction(reg,hp);
  244. end;
  245. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  246. var
  247. p: taicpu;
  248. opcount: longint;
  249. begin
  250. RegReadByInstruction := false;
  251. if hp.typ <> ait_instruction then
  252. exit;
  253. p := taicpu(hp);
  254. case p.opcode of
  255. A_CALL:
  256. regreadbyinstruction := true;
  257. A_IMUL:
  258. case p.ops of
  259. 1:
  260. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  261. (
  262. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  263. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  264. );
  265. 2,3:
  266. regReadByInstruction :=
  267. reginop(reg,p.oper[0]^) or
  268. reginop(reg,p.oper[1]^);
  269. end;
  270. A_MUL:
  271. begin
  272. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  273. (
  274. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  275. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  276. );
  277. end;
  278. A_IDIV,A_DIV:
  279. begin
  280. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  281. (
  282. (getregtype(reg)=R_INTREGISTER) and
  283. (
  284. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  285. )
  286. );
  287. end;
  288. else
  289. begin
  290. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  291. begin
  292. RegReadByInstruction := false;
  293. exit;
  294. end;
  295. for opcount := 0 to p.ops-1 do
  296. if (p.oper[opCount]^.typ = top_ref) and
  297. RegInRef(reg,p.oper[opcount]^.ref^) then
  298. begin
  299. RegReadByInstruction := true;
  300. exit
  301. end;
  302. { special handling for SSE MOVSD }
  303. if (p.opcode=A_MOVSD) and (p.ops>0) then
  304. begin
  305. if p.ops<>2 then
  306. internalerror(2017042702);
  307. regReadByInstruction := reginop(reg,p.oper[0]^) or
  308. (
  309. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  310. );
  311. exit;
  312. end;
  313. with insprop[p.opcode] do
  314. begin
  315. if getregtype(reg)=R_INTREGISTER then
  316. begin
  317. case getsupreg(reg) of
  318. RS_EAX:
  319. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  320. begin
  321. RegReadByInstruction := true;
  322. exit
  323. end;
  324. RS_ECX:
  325. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  326. begin
  327. RegReadByInstruction := true;
  328. exit
  329. end;
  330. RS_EDX:
  331. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  332. begin
  333. RegReadByInstruction := true;
  334. exit
  335. end;
  336. RS_EBX:
  337. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  338. begin
  339. RegReadByInstruction := true;
  340. exit
  341. end;
  342. RS_ESP:
  343. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  344. begin
  345. RegReadByInstruction := true;
  346. exit
  347. end;
  348. RS_EBP:
  349. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  350. begin
  351. RegReadByInstruction := true;
  352. exit
  353. end;
  354. RS_ESI:
  355. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  356. begin
  357. RegReadByInstruction := true;
  358. exit
  359. end;
  360. RS_EDI:
  361. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  362. begin
  363. RegReadByInstruction := true;
  364. exit
  365. end;
  366. end;
  367. end;
  368. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  369. begin
  370. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  371. begin
  372. case p.condition of
  373. C_A,C_NBE, { CF=0 and ZF=0 }
  374. C_BE,C_NA: { CF=1 or ZF=1 }
  375. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  376. C_AE,C_NB,C_NC, { CF=0 }
  377. C_B,C_NAE,C_C: { CF=1 }
  378. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  379. C_NE,C_NZ, { ZF=0 }
  380. C_E,C_Z: { ZF=1 }
  381. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  382. C_G,C_NLE, { ZF=0 and SF=OF }
  383. C_LE,C_NG: { ZF=1 or SF<>OF }
  384. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  385. C_GE,C_NL, { SF=OF }
  386. C_L,C_NGE: { SF<>OF }
  387. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  388. C_NO, { OF=0 }
  389. C_O: { OF=1 }
  390. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  391. C_NP,C_PO, { PF=0 }
  392. C_P,C_PE: { PF=1 }
  393. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  394. C_NS, { SF=0 }
  395. C_S: { SF=1 }
  396. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  397. else
  398. internalerror(2017042701);
  399. end;
  400. if RegReadByInstruction then
  401. exit;
  402. end;
  403. case getsubreg(reg) of
  404. R_SUBW,R_SUBD,R_SUBQ:
  405. RegReadByInstruction :=
  406. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  407. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  408. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  409. R_SUBFLAGCARRY:
  410. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  411. R_SUBFLAGPARITY:
  412. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  413. R_SUBFLAGAUXILIARY:
  414. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  415. R_SUBFLAGZERO:
  416. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  417. R_SUBFLAGSIGN:
  418. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  419. R_SUBFLAGOVERFLOW:
  420. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  421. R_SUBFLAGINTERRUPT:
  422. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  423. R_SUBFLAGDIRECTION:
  424. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  425. else
  426. internalerror(2017042601);
  427. end;
  428. exit;
  429. end;
  430. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  431. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  432. (p.oper[0]^.reg=p.oper[1]^.reg) then
  433. exit;
  434. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  435. begin
  436. RegReadByInstruction := true;
  437. exit
  438. end;
  439. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  440. begin
  441. RegReadByInstruction := true;
  442. exit
  443. end;
  444. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  445. begin
  446. RegReadByInstruction := true;
  447. exit
  448. end;
  449. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  450. begin
  451. RegReadByInstruction := true;
  452. exit
  453. end;
  454. end;
  455. end;
  456. end;
  457. end;
  458. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  459. begin
  460. result:=false;
  461. if p1.typ<>ait_instruction then
  462. exit;
  463. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  464. exit(true);
  465. if (getregtype(reg)=R_INTREGISTER) and
  466. { change information for xmm movsd are not correct }
  467. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  468. begin
  469. case getsupreg(reg) of
  470. { RS_EAX = RS_RAX on x86-64 }
  471. RS_EAX:
  472. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  473. RS_ECX:
  474. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  475. RS_EDX:
  476. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  477. RS_EBX:
  478. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  479. RS_ESP:
  480. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  481. RS_EBP:
  482. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  483. RS_ESI:
  484. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  485. RS_EDI:
  486. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  487. else
  488. ;
  489. end;
  490. if result then
  491. exit;
  492. end
  493. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  494. begin
  495. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  496. exit(true);
  497. case getsubreg(reg) of
  498. R_SUBFLAGCARRY:
  499. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  500. R_SUBFLAGPARITY:
  501. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  502. R_SUBFLAGAUXILIARY:
  503. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  504. R_SUBFLAGZERO:
  505. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  506. R_SUBFLAGSIGN:
  507. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  508. R_SUBFLAGOVERFLOW:
  509. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  510. R_SUBFLAGINTERRUPT:
  511. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  512. R_SUBFLAGDIRECTION:
  513. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  514. else
  515. ;
  516. end;
  517. if result then
  518. exit;
  519. end
  520. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  521. exit(true);
  522. Result:=inherited RegInInstruction(Reg, p1);
  523. end;
  524. {$ifdef DEBUG_AOPTCPU}
  525. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  526. begin
  527. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  528. end;
  529. function debug_tostr(i: tcgint): string; inline;
  530. begin
  531. Result := tostr(i);
  532. end;
  533. function debug_regname(r: TRegister): string; inline;
  534. begin
  535. Result := '%' + std_regname(r);
  536. end;
  537. { Debug output function - creates a string representation of an operator }
  538. function debug_operstr(oper: TOper): string;
  539. begin
  540. case oper.typ of
  541. top_const:
  542. Result := '$' + debug_tostr(oper.val);
  543. top_reg:
  544. Result := debug_regname(oper.reg);
  545. top_ref:
  546. begin
  547. if oper.ref^.offset <> 0 then
  548. Result := debug_tostr(oper.ref^.offset) + '('
  549. else
  550. Result := '(';
  551. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  552. begin
  553. Result := Result + debug_regname(oper.ref^.base);
  554. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  555. Result := Result + ',' + debug_regname(oper.ref^.index);
  556. end
  557. else
  558. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  559. Result := Result + debug_regname(oper.ref^.index);
  560. if (oper.ref^.scalefactor > 1) then
  561. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  562. else
  563. Result := Result + ')';
  564. end;
  565. else
  566. Result := '[UNKNOWN]';
  567. end;
  568. end;
  569. function debug_op2str(opcode: tasmop): string; inline;
  570. begin
  571. Result := std_op2str[opcode];
  572. end;
  573. function debug_opsize2str(opsize: topsize): string; inline;
  574. begin
  575. Result := gas_opsize2str[opsize];
  576. end;
  577. {$else DEBUG_AOPTCPU}
  578. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  579. begin
  580. end;
  581. function debug_tostr(i: tcgint): string; inline;
  582. begin
  583. Result := '';
  584. end;
  585. function debug_regname(r: TRegister): string; inline;
  586. begin
  587. Result := '';
  588. end;
  589. function debug_operstr(oper: TOper): string; inline;
  590. begin
  591. Result := '';
  592. end;
  593. function debug_op2str(opcode: tasmop): string; inline;
  594. begin
  595. Result := '';
  596. end;
  597. function debug_opsize2str(opsize: topsize): string; inline;
  598. begin
  599. Result := '';
  600. end;
  601. {$endif DEBUG_AOPTCPU}
  602. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  603. begin
  604. if not SuperRegistersEqual(reg1,reg2) then
  605. exit(false);
  606. if getregtype(reg1)<>R_INTREGISTER then
  607. exit(true); {because SuperRegisterEqual is true}
  608. case getsubreg(reg1) of
  609. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  610. higher, it preserves the high bits, so the new value depends on
  611. reg2's previous value. In other words, it is equivalent to doing:
  612. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  613. R_SUBL:
  614. exit(getsubreg(reg2)=R_SUBL);
  615. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  616. higher, it actually does a:
  617. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  618. R_SUBH:
  619. exit(getsubreg(reg2)=R_SUBH);
  620. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  621. bits of reg2:
  622. reg2 := (reg2 and $ffff0000) or word(reg1); }
  623. R_SUBW:
  624. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  625. { a write to R_SUBD always overwrites every other subregister,
  626. because it clears the high 32 bits of R_SUBQ on x86_64 }
  627. R_SUBD,
  628. R_SUBQ:
  629. exit(true);
  630. else
  631. internalerror(2017042801);
  632. end;
  633. end;
  634. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  635. begin
  636. if not SuperRegistersEqual(reg1,reg2) then
  637. exit(false);
  638. if getregtype(reg1)<>R_INTREGISTER then
  639. exit(true); {because SuperRegisterEqual is true}
  640. case getsubreg(reg1) of
  641. R_SUBL:
  642. exit(getsubreg(reg2)<>R_SUBH);
  643. R_SUBH:
  644. exit(getsubreg(reg2)<>R_SUBL);
  645. R_SUBW,
  646. R_SUBD,
  647. R_SUBQ:
  648. exit(true);
  649. else
  650. internalerror(2017042802);
  651. end;
  652. end;
  653. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  654. var
  655. hp1 : tai;
  656. l : TCGInt;
  657. begin
  658. result:=false;
  659. { changes the code sequence
  660. shr/sar const1, x
  661. shl const2, x
  662. to
  663. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  664. if GetNextInstruction(p, hp1) and
  665. MatchInstruction(hp1,A_SHL,[]) and
  666. (taicpu(p).oper[0]^.typ = top_const) and
  667. (taicpu(hp1).oper[0]^.typ = top_const) and
  668. (taicpu(hp1).opsize = taicpu(p).opsize) and
  669. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  670. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  671. begin
  672. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  673. not(cs_opt_size in current_settings.optimizerswitches) then
  674. begin
  675. { shr/sar const1, %reg
  676. shl const2, %reg
  677. with const1 > const2 }
  678. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  679. taicpu(hp1).opcode := A_AND;
  680. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  681. case taicpu(p).opsize Of
  682. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  683. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  684. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  685. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  686. else
  687. Internalerror(2017050703)
  688. end;
  689. end
  690. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  691. not(cs_opt_size in current_settings.optimizerswitches) then
  692. begin
  693. { shr/sar const1, %reg
  694. shl const2, %reg
  695. with const1 < const2 }
  696. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  697. taicpu(p).opcode := A_AND;
  698. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  699. case taicpu(p).opsize Of
  700. S_B: taicpu(p).loadConst(0,l Xor $ff);
  701. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  702. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  703. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  704. else
  705. Internalerror(2017050702)
  706. end;
  707. end
  708. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  709. begin
  710. { shr/sar const1, %reg
  711. shl const2, %reg
  712. with const1 = const2 }
  713. taicpu(p).opcode := A_AND;
  714. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  715. case taicpu(p).opsize Of
  716. S_B: taicpu(p).loadConst(0,l Xor $ff);
  717. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  718. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  719. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  720. else
  721. Internalerror(2017050701)
  722. end;
  723. asml.remove(hp1);
  724. hp1.free;
  725. end;
  726. end;
  727. end;
  728. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  729. var
  730. opsize : topsize;
  731. hp1 : tai;
  732. tmpref : treference;
  733. ShiftValue : Cardinal;
  734. BaseValue : TCGInt;
  735. begin
  736. result:=false;
  737. opsize:=taicpu(p).opsize;
  738. { changes certain "imul const, %reg"'s to lea sequences }
  739. if (MatchOpType(taicpu(p),top_const,top_reg) or
  740. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  741. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  742. if (taicpu(p).oper[0]^.val = 1) then
  743. if (taicpu(p).ops = 2) then
  744. { remove "imul $1, reg" }
  745. begin
  746. hp1 := tai(p.Next);
  747. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  748. RemoveCurrentP(p);
  749. result:=true;
  750. end
  751. else
  752. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  753. begin
  754. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  755. InsertLLItem(p.previous, p.next, hp1);
  756. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  757. p.free;
  758. p := hp1;
  759. end
  760. else if ((taicpu(p).ops <= 2) or
  761. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  762. not(cs_opt_size in current_settings.optimizerswitches) and
  763. (not(GetNextInstruction(p, hp1)) or
  764. not((tai(hp1).typ = ait_instruction) and
  765. ((taicpu(hp1).opcode=A_Jcc) and
  766. (taicpu(hp1).condition in [C_O,C_NO])))) then
  767. begin
  768. {
  769. imul X, reg1, reg2 to
  770. lea (reg1,reg1,Y), reg2
  771. shl ZZ,reg2
  772. imul XX, reg1 to
  773. lea (reg1,reg1,YY), reg1
  774. shl ZZ,reg2
  775. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  776. it does not exist as a separate optimization target in FPC though.
  777. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  778. at most two zeros
  779. }
  780. reference_reset(tmpref,1,[]);
  781. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  782. begin
  783. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  784. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  785. TmpRef.base := taicpu(p).oper[1]^.reg;
  786. TmpRef.index := taicpu(p).oper[1]^.reg;
  787. if not(BaseValue in [3,5,9]) then
  788. Internalerror(2018110101);
  789. TmpRef.ScaleFactor := BaseValue-1;
  790. if (taicpu(p).ops = 2) then
  791. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  792. else
  793. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  794. AsmL.InsertAfter(hp1,p);
  795. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  796. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  797. RemoveCurrentP(p);
  798. if ShiftValue>0 then
  799. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  800. end;
  801. end;
  802. end;
  803. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  804. var
  805. p: taicpu;
  806. begin
  807. if not assigned(hp) or
  808. (hp.typ <> ait_instruction) then
  809. begin
  810. Result := false;
  811. exit;
  812. end;
  813. p := taicpu(hp);
  814. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  815. with insprop[p.opcode] do
  816. begin
  817. case getsubreg(reg) of
  818. R_SUBW,R_SUBD,R_SUBQ:
  819. Result:=
  820. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  821. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  822. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  823. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  824. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  825. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  826. R_SUBFLAGCARRY:
  827. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  828. R_SUBFLAGPARITY:
  829. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  830. R_SUBFLAGAUXILIARY:
  831. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  832. R_SUBFLAGZERO:
  833. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  834. R_SUBFLAGSIGN:
  835. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  836. R_SUBFLAGOVERFLOW:
  837. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  838. R_SUBFLAGINTERRUPT:
  839. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  840. R_SUBFLAGDIRECTION:
  841. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  842. else
  843. begin
  844. writeln(getsubreg(reg));
  845. internalerror(2017050501);
  846. end;
  847. end;
  848. exit;
  849. end;
  850. Result :=
  851. (((p.opcode = A_MOV) or
  852. (p.opcode = A_MOVZX) or
  853. (p.opcode = A_MOVSX) or
  854. (p.opcode = A_LEA) or
  855. (p.opcode = A_VMOVSS) or
  856. (p.opcode = A_VMOVSD) or
  857. (p.opcode = A_VMOVAPD) or
  858. (p.opcode = A_VMOVAPS) or
  859. (p.opcode = A_VMOVQ) or
  860. (p.opcode = A_MOVSS) or
  861. (p.opcode = A_MOVSD) or
  862. (p.opcode = A_MOVQ) or
  863. (p.opcode = A_MOVAPD) or
  864. (p.opcode = A_MOVAPS) or
  865. {$ifndef x86_64}
  866. (p.opcode = A_LDS) or
  867. (p.opcode = A_LES) or
  868. {$endif not x86_64}
  869. (p.opcode = A_LFS) or
  870. (p.opcode = A_LGS) or
  871. (p.opcode = A_LSS)) and
  872. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  873. (p.oper[1]^.typ = top_reg) and
  874. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  875. ((p.oper[0]^.typ = top_const) or
  876. ((p.oper[0]^.typ = top_reg) and
  877. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  878. ((p.oper[0]^.typ = top_ref) and
  879. not RegInRef(reg,p.oper[0]^.ref^)))) or
  880. ((p.opcode = A_POP) and
  881. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  882. ((p.opcode = A_IMUL) and
  883. (p.ops=3) and
  884. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  885. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  886. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  887. ((((p.opcode = A_IMUL) or
  888. (p.opcode = A_MUL)) and
  889. (p.ops=1)) and
  890. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  891. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  892. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  893. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  894. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  895. {$ifdef x86_64}
  896. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  897. {$endif x86_64}
  898. )) or
  899. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  900. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  901. {$ifdef x86_64}
  902. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  903. {$endif x86_64}
  904. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  905. {$ifndef x86_64}
  906. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  907. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  908. {$endif not x86_64}
  909. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  910. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  911. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  912. {$ifndef x86_64}
  913. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  914. {$endif not x86_64}
  915. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  916. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  917. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  918. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  919. {$ifdef x86_64}
  920. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  921. {$endif x86_64}
  922. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  923. (((p.opcode = A_FSTSW) or
  924. (p.opcode = A_FNSTSW)) and
  925. (p.oper[0]^.typ=top_reg) and
  926. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  927. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  928. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  929. (p.oper[0]^.reg=p.oper[1]^.reg) and
  930. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  931. end;
  932. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  933. var
  934. hp2,hp3 : tai;
  935. begin
  936. { some x86-64 issue a NOP before the real exit code }
  937. if MatchInstruction(p,A_NOP,[]) then
  938. GetNextInstruction(p,p);
  939. result:=assigned(p) and (p.typ=ait_instruction) and
  940. ((taicpu(p).opcode = A_RET) or
  941. ((taicpu(p).opcode=A_LEAVE) and
  942. GetNextInstruction(p,hp2) and
  943. MatchInstruction(hp2,A_RET,[S_NO])
  944. ) or
  945. (((taicpu(p).opcode=A_LEA) and
  946. MatchOpType(taicpu(p),top_ref,top_reg) and
  947. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  948. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  949. ) and
  950. GetNextInstruction(p,hp2) and
  951. MatchInstruction(hp2,A_RET,[S_NO])
  952. ) or
  953. ((((taicpu(p).opcode=A_MOV) and
  954. MatchOpType(taicpu(p),top_reg,top_reg) and
  955. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  956. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  957. ((taicpu(p).opcode=A_LEA) and
  958. MatchOpType(taicpu(p),top_ref,top_reg) and
  959. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  960. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  961. )
  962. ) and
  963. GetNextInstruction(p,hp2) and
  964. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  965. MatchOpType(taicpu(hp2),top_reg) and
  966. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  967. GetNextInstruction(hp2,hp3) and
  968. MatchInstruction(hp3,A_RET,[S_NO])
  969. )
  970. );
  971. end;
  972. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  973. begin
  974. isFoldableArithOp := False;
  975. case hp1.opcode of
  976. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  977. isFoldableArithOp :=
  978. ((taicpu(hp1).oper[0]^.typ = top_const) or
  979. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  980. (taicpu(hp1).oper[0]^.reg <> reg))) and
  981. (taicpu(hp1).oper[1]^.typ = top_reg) and
  982. (taicpu(hp1).oper[1]^.reg = reg);
  983. A_INC,A_DEC,A_NEG,A_NOT:
  984. isFoldableArithOp :=
  985. (taicpu(hp1).oper[0]^.typ = top_reg) and
  986. (taicpu(hp1).oper[0]^.reg = reg);
  987. else
  988. ;
  989. end;
  990. end;
  991. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  992. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  993. var
  994. hp2: tai;
  995. begin
  996. hp2 := p;
  997. repeat
  998. hp2 := tai(hp2.previous);
  999. if assigned(hp2) and
  1000. (hp2.typ = ait_regalloc) and
  1001. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1002. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1003. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1004. begin
  1005. asml.remove(hp2);
  1006. hp2.free;
  1007. break;
  1008. end;
  1009. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1010. end;
  1011. begin
  1012. case current_procinfo.procdef.returndef.typ of
  1013. arraydef,recorddef,pointerdef,
  1014. stringdef,enumdef,procdef,objectdef,errordef,
  1015. filedef,setdef,procvardef,
  1016. classrefdef,forwarddef:
  1017. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1018. orddef:
  1019. if current_procinfo.procdef.returndef.size <> 0 then
  1020. begin
  1021. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1022. { for int64/qword }
  1023. if current_procinfo.procdef.returndef.size = 8 then
  1024. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1025. end;
  1026. else
  1027. ;
  1028. end;
  1029. end;
  1030. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1031. var
  1032. hp1,hp2 : tai;
  1033. begin
  1034. result:=false;
  1035. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1036. begin
  1037. { vmova* reg1,reg1
  1038. =>
  1039. <nop> }
  1040. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1041. begin
  1042. GetNextInstruction(p,hp1);
  1043. asml.Remove(p);
  1044. p.Free;
  1045. p:=hp1;
  1046. result:=true;
  1047. end
  1048. else if GetNextInstruction(p,hp1) then
  1049. begin
  1050. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1051. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1052. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1053. begin
  1054. { vmova* reg1,reg2
  1055. vmova* reg2,reg3
  1056. dealloc reg2
  1057. =>
  1058. vmova* reg1,reg3 }
  1059. TransferUsedRegs(TmpUsedRegs);
  1060. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1061. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1062. begin
  1063. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1064. asml.Remove(hp1);
  1065. hp1.Free;
  1066. result:=true;
  1067. end
  1068. { special case:
  1069. vmova* reg1,reg2
  1070. vmova* reg2,reg1
  1071. =>
  1072. vmova* reg1,reg2 }
  1073. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1074. begin
  1075. asml.Remove(hp1);
  1076. hp1.Free;
  1077. result:=true;
  1078. end
  1079. end
  1080. else if MatchInstruction(hp1,[A_VFMADDPD,
  1081. A_VFMADD132PD,
  1082. A_VFMADD132PS,
  1083. A_VFMADD132SD,
  1084. A_VFMADD132SS,
  1085. A_VFMADD213PD,
  1086. A_VFMADD213PS,
  1087. A_VFMADD213SD,
  1088. A_VFMADD213SS,
  1089. A_VFMADD231PD,
  1090. A_VFMADD231PS,
  1091. A_VFMADD231SD,
  1092. A_VFMADD231SS,
  1093. A_VFMADDSUB132PD,
  1094. A_VFMADDSUB132PS,
  1095. A_VFMADDSUB213PD,
  1096. A_VFMADDSUB213PS,
  1097. A_VFMADDSUB231PD,
  1098. A_VFMADDSUB231PS,
  1099. A_VFMSUB132PD,
  1100. A_VFMSUB132PS,
  1101. A_VFMSUB132SD,
  1102. A_VFMSUB132SS,
  1103. A_VFMSUB213PD,
  1104. A_VFMSUB213PS,
  1105. A_VFMSUB213SD,
  1106. A_VFMSUB213SS,
  1107. A_VFMSUB231PD,
  1108. A_VFMSUB231PS,
  1109. A_VFMSUB231SD,
  1110. A_VFMSUB231SS,
  1111. A_VFMSUBADD132PD,
  1112. A_VFMSUBADD132PS,
  1113. A_VFMSUBADD213PD,
  1114. A_VFMSUBADD213PS,
  1115. A_VFMSUBADD231PD,
  1116. A_VFMSUBADD231PS,
  1117. A_VFNMADD132PD,
  1118. A_VFNMADD132PS,
  1119. A_VFNMADD132SD,
  1120. A_VFNMADD132SS,
  1121. A_VFNMADD213PD,
  1122. A_VFNMADD213PS,
  1123. A_VFNMADD213SD,
  1124. A_VFNMADD213SS,
  1125. A_VFNMADD231PD,
  1126. A_VFNMADD231PS,
  1127. A_VFNMADD231SD,
  1128. A_VFNMADD231SS,
  1129. A_VFNMSUB132PD,
  1130. A_VFNMSUB132PS,
  1131. A_VFNMSUB132SD,
  1132. A_VFNMSUB132SS,
  1133. A_VFNMSUB213PD,
  1134. A_VFNMSUB213PS,
  1135. A_VFNMSUB213SD,
  1136. A_VFNMSUB213SS,
  1137. A_VFNMSUB231PD,
  1138. A_VFNMSUB231PS,
  1139. A_VFNMSUB231SD,
  1140. A_VFNMSUB231SS],[S_NO]) and
  1141. { we mix single and double opperations here because we assume that the compiler
  1142. generates vmovapd only after double operations and vmovaps only after single operations }
  1143. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1144. GetNextInstruction(hp1,hp2) and
  1145. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1146. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1147. begin
  1148. TransferUsedRegs(TmpUsedRegs);
  1149. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1150. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1151. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  1152. then
  1153. begin
  1154. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1155. asml.Remove(p);
  1156. p.Free;
  1157. asml.Remove(hp2);
  1158. hp2.Free;
  1159. p:=hp1;
  1160. end;
  1161. end
  1162. else if (hp1.typ = ait_instruction) and
  1163. GetNextInstruction(hp1, hp2) and
  1164. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1165. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1166. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1167. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1168. (((taicpu(p).opcode=A_MOVAPS) and
  1169. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1170. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1171. ((taicpu(p).opcode=A_MOVAPD) and
  1172. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1173. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1174. ) then
  1175. { change
  1176. movapX reg,reg2
  1177. addsX/subsX/... reg3, reg2
  1178. movapX reg2,reg
  1179. to
  1180. addsX/subsX/... reg3,reg
  1181. }
  1182. begin
  1183. TransferUsedRegs(TmpUsedRegs);
  1184. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1185. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1186. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1187. begin
  1188. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1189. debug_op2str(taicpu(p).opcode)+' '+
  1190. debug_op2str(taicpu(hp1).opcode)+' '+
  1191. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1192. { we cannot eliminate the first move if
  1193. the operations uses the same register for source and dest }
  1194. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1195. begin
  1196. asml.remove(p);
  1197. p.Free;
  1198. end;
  1199. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1200. asml.remove(hp2);
  1201. hp2.Free;
  1202. p:=hp1;
  1203. result:=true;
  1204. end;
  1205. end;
  1206. end;
  1207. end;
  1208. end;
  1209. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1210. var
  1211. hp1 : tai;
  1212. begin
  1213. result:=false;
  1214. { replace
  1215. V<Op>X %mreg1,%mreg2,%mreg3
  1216. VMovX %mreg3,%mreg4
  1217. dealloc %mreg3
  1218. by
  1219. V<Op>X %mreg1,%mreg2,%mreg4
  1220. ?
  1221. }
  1222. if GetNextInstruction(p,hp1) and
  1223. { we mix single and double operations here because we assume that the compiler
  1224. generates vmovapd only after double operations and vmovaps only after single operations }
  1225. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1226. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1227. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1228. begin
  1229. TransferUsedRegs(TmpUsedRegs);
  1230. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1231. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1232. ) then
  1233. begin
  1234. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1235. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1236. asml.Remove(hp1);
  1237. hp1.Free;
  1238. result:=true;
  1239. end;
  1240. end;
  1241. end;
  1242. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1243. var
  1244. hp1, hp2: tai;
  1245. GetNextInstruction_p: Boolean;
  1246. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1247. NewSize: topsize;
  1248. begin
  1249. Result:=false;
  1250. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1251. { remove mov reg1,reg1? }
  1252. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1253. then
  1254. begin
  1255. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1256. { take care of the register (de)allocs following p }
  1257. UpdateUsedRegs(tai(p.next));
  1258. asml.remove(p);
  1259. p.free;
  1260. p:=hp1;
  1261. Result:=true;
  1262. exit;
  1263. end;
  1264. if GetNextInstruction_p and
  1265. MatchInstruction(hp1,A_AND,[]) and
  1266. (taicpu(p).oper[1]^.typ = top_reg) and
  1267. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1268. begin
  1269. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1270. begin
  1271. case taicpu(p).opsize of
  1272. S_L:
  1273. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1274. begin
  1275. { Optimize out:
  1276. mov x, %reg
  1277. and ffffffffh, %reg
  1278. }
  1279. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1280. asml.remove(hp1);
  1281. hp1.free;
  1282. Result:=true;
  1283. exit;
  1284. end;
  1285. S_Q: { TODO: Confirm if this is even possible }
  1286. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1287. begin
  1288. { Optimize out:
  1289. mov x, %reg
  1290. and ffffffffffffffffh, %reg
  1291. }
  1292. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1293. asml.remove(hp1);
  1294. hp1.free;
  1295. Result:=true;
  1296. exit;
  1297. end;
  1298. else
  1299. ;
  1300. end;
  1301. end
  1302. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1303. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1304. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1305. then
  1306. begin
  1307. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1308. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1309. case taicpu(p).opsize of
  1310. S_B:
  1311. if (taicpu(hp1).oper[0]^.val = $ff) then
  1312. begin
  1313. { Convert:
  1314. movb x, %regl movb x, %regl
  1315. andw ffh, %regw andl ffh, %regd
  1316. To:
  1317. movzbw x, %regd movzbl x, %regd
  1318. (Identical registers, just different sizes)
  1319. }
  1320. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1321. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1322. case taicpu(hp1).opsize of
  1323. S_W: NewSize := S_BW;
  1324. S_L: NewSize := S_BL;
  1325. {$ifdef x86_64}
  1326. S_Q: NewSize := S_BQ;
  1327. {$endif x86_64}
  1328. else
  1329. InternalError(2018011510);
  1330. end;
  1331. end
  1332. else
  1333. NewSize := S_NO;
  1334. S_W:
  1335. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1336. begin
  1337. { Convert:
  1338. movw x, %regw
  1339. andl ffffh, %regd
  1340. To:
  1341. movzwl x, %regd
  1342. (Identical registers, just different sizes)
  1343. }
  1344. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1345. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1346. case taicpu(hp1).opsize of
  1347. S_L: NewSize := S_WL;
  1348. {$ifdef x86_64}
  1349. S_Q: NewSize := S_WQ;
  1350. {$endif x86_64}
  1351. else
  1352. InternalError(2018011511);
  1353. end;
  1354. end
  1355. else
  1356. NewSize := S_NO;
  1357. else
  1358. NewSize := S_NO;
  1359. end;
  1360. if NewSize <> S_NO then
  1361. begin
  1362. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1363. { The actual optimization }
  1364. taicpu(p).opcode := A_MOVZX;
  1365. taicpu(p).changeopsize(NewSize);
  1366. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1367. { Safeguard if "and" is followed by a conditional command }
  1368. TransferUsedRegs(TmpUsedRegs);
  1369. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1370. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1371. begin
  1372. { At this point, the "and" command is effectively equivalent to
  1373. "test %reg,%reg". This will be handled separately by the
  1374. Peephole Optimizer. [Kit] }
  1375. DebugMsg(SPeepholeOptimization + PreMessage +
  1376. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1377. end
  1378. else
  1379. begin
  1380. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1381. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1382. asml.Remove(hp1);
  1383. hp1.Free;
  1384. end;
  1385. Result := True;
  1386. Exit;
  1387. end;
  1388. end;
  1389. end;
  1390. { Next instruction is also a MOV ? }
  1391. if GetNextInstruction_p and
  1392. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1393. begin
  1394. if (taicpu(p).oper[1]^.typ = top_reg) and
  1395. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1396. begin
  1397. TransferUsedRegs(TmpUsedRegs);
  1398. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1399. { we have
  1400. mov x, %treg
  1401. mov %treg, y
  1402. }
  1403. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1404. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1405. { we've got
  1406. mov x, %treg
  1407. mov %treg, y
  1408. with %treg is not used after }
  1409. case taicpu(p).oper[0]^.typ Of
  1410. top_reg:
  1411. begin
  1412. { change
  1413. mov %reg, %treg
  1414. mov %treg, y
  1415. to
  1416. mov %reg, y
  1417. }
  1418. if taicpu(hp1).oper[1]^.typ=top_reg then
  1419. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1420. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1421. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1422. asml.remove(hp1);
  1423. hp1.free;
  1424. Result:=true;
  1425. Exit;
  1426. end;
  1427. top_const:
  1428. begin
  1429. { change
  1430. mov const, %treg
  1431. mov %treg, y
  1432. to
  1433. mov const, y
  1434. }
  1435. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1436. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1437. begin
  1438. if taicpu(hp1).oper[1]^.typ=top_reg then
  1439. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1440. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1441. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1442. asml.remove(hp1);
  1443. hp1.free;
  1444. Result:=true;
  1445. Exit;
  1446. end;
  1447. end;
  1448. top_ref:
  1449. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1450. begin
  1451. { change
  1452. mov mem, %treg
  1453. mov %treg, %reg
  1454. to
  1455. mov mem, %reg"
  1456. }
  1457. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1458. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1459. asml.remove(hp1);
  1460. hp1.free;
  1461. Result:=true;
  1462. Exit;
  1463. end;
  1464. else
  1465. ;
  1466. end;
  1467. end;
  1468. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1469. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1470. { mov reg1, mem1 or mov mem1, reg1
  1471. mov mem2, reg2 mov reg2, mem2}
  1472. begin
  1473. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1474. { mov reg1, mem1 or mov mem1, reg1
  1475. mov mem2, reg1 mov reg2, mem1}
  1476. begin
  1477. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1478. { Removes the second statement from
  1479. mov reg1, mem1/reg2
  1480. mov mem1/reg2, reg1 }
  1481. begin
  1482. if taicpu(p).oper[0]^.typ=top_reg then
  1483. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1484. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1485. asml.remove(hp1);
  1486. hp1.free;
  1487. Result:=true;
  1488. exit;
  1489. end
  1490. else
  1491. begin
  1492. TransferUsedRegs(TmpUsedRegs);
  1493. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1494. if (taicpu(p).oper[1]^.typ = top_ref) and
  1495. { mov reg1, mem1
  1496. mov mem2, reg1 }
  1497. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1498. GetNextInstruction(hp1, hp2) and
  1499. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1500. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1501. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1502. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1503. { change to
  1504. mov reg1, mem1 mov reg1, mem1
  1505. mov mem2, reg1 cmp reg1, mem2
  1506. cmp mem1, reg1
  1507. }
  1508. begin
  1509. asml.remove(hp2);
  1510. hp2.free;
  1511. taicpu(hp1).opcode := A_CMP;
  1512. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1513. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1514. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1515. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1516. end;
  1517. end;
  1518. end
  1519. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1520. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1521. begin
  1522. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1523. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1524. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1525. end
  1526. else
  1527. begin
  1528. TransferUsedRegs(TmpUsedRegs);
  1529. if GetNextInstruction(hp1, hp2) and
  1530. MatchOpType(taicpu(p),top_ref,top_reg) and
  1531. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1532. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1533. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1534. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1535. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1536. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1537. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1538. { mov mem1, %reg1
  1539. mov %reg1, mem2
  1540. mov mem2, reg2
  1541. to:
  1542. mov mem1, reg2
  1543. mov reg2, mem2}
  1544. begin
  1545. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1546. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1547. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1548. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1549. asml.remove(hp2);
  1550. hp2.free;
  1551. end
  1552. {$ifdef i386}
  1553. { this is enabled for i386 only, as the rules to create the reg sets below
  1554. are too complicated for x86-64, so this makes this code too error prone
  1555. on x86-64
  1556. }
  1557. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1558. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1559. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1560. { mov mem1, reg1 mov mem1, reg1
  1561. mov reg1, mem2 mov reg1, mem2
  1562. mov mem2, reg2 mov mem2, reg1
  1563. to: to:
  1564. mov mem1, reg1 mov mem1, reg1
  1565. mov mem1, reg2 mov reg1, mem2
  1566. mov reg1, mem2
  1567. or (if mem1 depends on reg1
  1568. and/or if mem2 depends on reg2)
  1569. to:
  1570. mov mem1, reg1
  1571. mov reg1, mem2
  1572. mov reg1, reg2
  1573. }
  1574. begin
  1575. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1576. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1577. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1578. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1579. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1580. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1581. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1582. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1583. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1584. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1585. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1586. end
  1587. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1588. begin
  1589. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1590. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1591. end
  1592. else
  1593. begin
  1594. asml.remove(hp2);
  1595. hp2.free;
  1596. end
  1597. {$endif i386}
  1598. ;
  1599. end;
  1600. end;
  1601. (* { movl [mem1],reg1
  1602. movl [mem1],reg2
  1603. to
  1604. movl [mem1],reg1
  1605. movl reg1,reg2
  1606. }
  1607. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1608. (taicpu(p).oper[1]^.typ = top_reg) and
  1609. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1610. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1611. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1612. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1613. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1614. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1615. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1616. else*)
  1617. { movl const1,[mem1]
  1618. movl [mem1],reg1
  1619. to
  1620. movl const1,reg1
  1621. movl reg1,[mem1]
  1622. }
  1623. if MatchOpType(Taicpu(p),top_const,top_ref) and
  1624. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1625. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1626. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1627. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1628. begin
  1629. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1630. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1631. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1632. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1633. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1634. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1635. Result:=true;
  1636. exit;
  1637. end;
  1638. {
  1639. mov* x,reg1
  1640. mov* y,reg1
  1641. to
  1642. mov* y,reg1
  1643. }
  1644. if (taicpu(p).oper[1]^.typ=top_reg) and
  1645. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1646. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1647. begin
  1648. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1649. { take care of the register (de)allocs following p }
  1650. UpdateUsedRegs(tai(p.next));
  1651. asml.remove(p);
  1652. p.free;
  1653. p:=hp1;
  1654. Result:=true;
  1655. exit;
  1656. end;
  1657. end;
  1658. { search further than the next instruction for a mov }
  1659. if (cs_opt_level3 in current_settings.optimizerswitches) and
  1660. { check as much as possible before the expensive GetNextInstructionUsingReg call }
  1661. (taicpu(p).oper[1]^.typ = top_reg) and
  1662. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  1663. { we work with hp2 here, so hp1 can be still used later on when
  1664. checking for GetNextInstruction_p }
  1665. GetNextInstructionUsingReg(p,hp2,taicpu(p).oper[1]^.reg) and
  1666. MatchInstruction(hp2,A_MOV,[]) and
  1667. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1668. ((taicpu(p).oper[0]^.typ=top_const) or
  1669. ((taicpu(p).oper[0]^.typ=top_reg) and
  1670. not(RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  1671. )
  1672. ) then
  1673. begin
  1674. TransferUsedRegs(TmpUsedRegs);
  1675. { we have
  1676. mov x, %treg
  1677. mov %treg, y
  1678. }
  1679. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^)) and
  1680. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs)) then
  1681. { we've got
  1682. mov x, %treg
  1683. mov %treg, y
  1684. with %treg is not used after }
  1685. case taicpu(p).oper[0]^.typ Of
  1686. top_reg:
  1687. begin
  1688. { change
  1689. mov %reg, %treg
  1690. mov %treg, y
  1691. to
  1692. mov %reg, y
  1693. }
  1694. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp2,usedregs);
  1695. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1696. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  1697. { take care of the register (de)allocs following p }
  1698. UpdateUsedRegs(tai(p.next));
  1699. asml.remove(p);
  1700. p.free;
  1701. p:=hp1;
  1702. Result:=true;
  1703. Exit;
  1704. end;
  1705. top_const:
  1706. begin
  1707. { change
  1708. mov const, %treg
  1709. mov %treg, y
  1710. to
  1711. mov const, y
  1712. }
  1713. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  1714. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1715. begin
  1716. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1717. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  1718. { take care of the register (de)allocs following p }
  1719. UpdateUsedRegs(tai(p.next));
  1720. asml.remove(p);
  1721. p.free;
  1722. p:=hp1;
  1723. Result:=true;
  1724. Exit;
  1725. end;
  1726. end;
  1727. else
  1728. Internalerror(2019103001);
  1729. end;
  1730. end;
  1731. { Change
  1732. mov %reg1, %reg2
  1733. xxx %reg2, ???
  1734. to
  1735. mov %reg1, %reg2
  1736. xxx %reg1, ???
  1737. to avoid a write/read penalty
  1738. }
  1739. if GetNextInstruction_p and
  1740. MatchOpType(taicpu(p),top_reg,top_reg) and
  1741. MatchInstruction(hp1,A_OR,A_AND,A_TEST,[]) and
  1742. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1743. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1744. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1745. { we have
  1746. mov %reg1, %reg2
  1747. test/or/and %reg2, %reg2
  1748. }
  1749. begin
  1750. TransferUsedRegs(TmpUsedRegs);
  1751. { reg1 will be used after the first instruction,
  1752. so update the allocation info }
  1753. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1754. if GetNextInstruction(hp1, hp2) and
  1755. (hp2.typ = ait_instruction) and
  1756. taicpu(hp2).is_jmp and
  1757. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1758. { change
  1759. mov %reg1, %reg2
  1760. test/or/and %reg2, %reg2
  1761. jxx
  1762. to
  1763. test %reg1, %reg1
  1764. jxx
  1765. }
  1766. begin
  1767. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1768. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1769. DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);
  1770. asml.remove(p);
  1771. p.free;
  1772. p := hp1;
  1773. Exit;
  1774. end
  1775. else
  1776. { change
  1777. mov %reg1, %reg2
  1778. test/or/and %reg2, %reg2
  1779. to
  1780. mov %reg1, %reg2
  1781. test/or/and %reg1, %reg1
  1782. }
  1783. begin
  1784. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1785. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1786. DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);
  1787. end;
  1788. end;
  1789. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1790. x >= RetOffset) as it doesn't do anything (it writes either to a
  1791. parameter or to the temporary storage room for the function
  1792. result)
  1793. }
  1794. if GetNextInstruction_p and
  1795. IsExitCode(hp1) and
  1796. MatchOpType(taicpu(p),top_reg,top_ref) and
  1797. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1798. not(assigned(current_procinfo.procdef.funcretsym) and
  1799. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1800. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1801. begin
  1802. asml.remove(p);
  1803. p.free;
  1804. p:=hp1;
  1805. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1806. RemoveLastDeallocForFuncRes(p);
  1807. Result:=true;
  1808. exit;
  1809. end;
  1810. if GetNextInstruction_p and
  1811. MatchOpType(taicpu(p),top_reg,top_ref) and
  1812. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1813. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1814. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1815. begin
  1816. { change
  1817. mov reg1, mem1
  1818. test/cmp x, mem1
  1819. to
  1820. mov reg1, mem1
  1821. test/cmp x, reg1
  1822. }
  1823. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1824. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1825. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1826. exit;
  1827. end;
  1828. if GetNextInstruction_p and
  1829. (taicpu(p).oper[1]^.typ = top_reg) and
  1830. (hp1.typ = ait_instruction) and
  1831. GetNextInstruction(hp1, hp2) and
  1832. MatchInstruction(hp2,A_MOV,[]) and
  1833. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1834. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1835. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1836. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1837. ) then
  1838. begin
  1839. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1840. (taicpu(hp2).oper[0]^.typ=top_reg) then
  1841. { change movsX/movzX reg/ref, reg2
  1842. add/sub/or/... reg3/$const, reg2
  1843. mov reg2 reg/ref
  1844. dealloc reg2
  1845. to
  1846. add/sub/or/... reg3/$const, reg/ref }
  1847. begin
  1848. TransferUsedRegs(TmpUsedRegs);
  1849. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1850. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1851. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1852. begin
  1853. { by example:
  1854. movswl %si,%eax movswl %si,%eax p
  1855. decl %eax addl %edx,%eax hp1
  1856. movw %ax,%si movw %ax,%si hp2
  1857. ->
  1858. movswl %si,%eax movswl %si,%eax p
  1859. decw %eax addw %edx,%eax hp1
  1860. movw %ax,%si movw %ax,%si hp2
  1861. }
  1862. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  1863. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1864. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1865. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);
  1866. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1867. {
  1868. ->
  1869. movswl %si,%eax movswl %si,%eax p
  1870. decw %si addw %dx,%si hp1
  1871. movw %ax,%si movw %ax,%si hp2
  1872. }
  1873. case taicpu(hp1).ops of
  1874. 1:
  1875. begin
  1876. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1877. if taicpu(hp1).oper[0]^.typ=top_reg then
  1878. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1879. end;
  1880. 2:
  1881. begin
  1882. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1883. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1884. (taicpu(hp1).opcode<>A_SHL) and
  1885. (taicpu(hp1).opcode<>A_SHR) and
  1886. (taicpu(hp1).opcode<>A_SAR) then
  1887. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1888. end;
  1889. else
  1890. internalerror(2008042701);
  1891. end;
  1892. {
  1893. ->
  1894. decw %si addw %dx,%si p
  1895. }
  1896. asml.remove(hp2);
  1897. hp2.Free;
  1898. RemoveCurrentP(p);
  1899. Result:=True;
  1900. Exit;
  1901. end;
  1902. end;
  1903. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1904. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  1905. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  1906. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  1907. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  1908. )
  1909. {$ifdef i386}
  1910. { byte registers of esi, edi, ebp, esp are not available on i386 }
  1911. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1912. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1913. {$endif i386}
  1914. then
  1915. { change movsX/movzX reg/ref, reg2
  1916. add/sub/or/... regX/$const, reg2
  1917. mov reg2, reg3
  1918. dealloc reg2
  1919. to
  1920. movsX/movzX reg/ref, reg3
  1921. add/sub/or/... reg3/$const, reg3
  1922. }
  1923. begin
  1924. TransferUsedRegs(TmpUsedRegs);
  1925. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1926. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1927. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1928. begin
  1929. { by example:
  1930. movswl %si,%eax movswl %si,%eax p
  1931. decl %eax addl %edx,%eax hp1
  1932. movw %ax,%si movw %ax,%si hp2
  1933. ->
  1934. movswl %si,%eax movswl %si,%eax p
  1935. decw %eax addw %edx,%eax hp1
  1936. movw %ax,%si movw %ax,%si hp2
  1937. }
  1938. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  1939. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1940. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1941. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  1942. { limit size of constants as well to avoid assembler errors, but
  1943. check opsize to avoid overflow when left shifting the 1 }
  1944. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=4) then
  1945. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl (topsize2memsize[taicpu(hp2).opsize]*8))-1);
  1946. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1947. taicpu(p).changeopsize(taicpu(hp2).opsize);
  1948. if taicpu(p).oper[0]^.typ=top_reg then
  1949. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1950. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  1951. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1952. {
  1953. ->
  1954. movswl %si,%eax movswl %si,%eax p
  1955. decw %si addw %dx,%si hp1
  1956. movw %ax,%si movw %ax,%si hp2
  1957. }
  1958. case taicpu(hp1).ops of
  1959. 1:
  1960. begin
  1961. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1962. if taicpu(hp1).oper[0]^.typ=top_reg then
  1963. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1964. end;
  1965. 2:
  1966. begin
  1967. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1968. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1969. (taicpu(hp1).opcode<>A_SHL) and
  1970. (taicpu(hp1).opcode<>A_SHR) and
  1971. (taicpu(hp1).opcode<>A_SAR) then
  1972. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1973. end;
  1974. else
  1975. internalerror(2018111801);
  1976. end;
  1977. {
  1978. ->
  1979. decw %si addw %dx,%si p
  1980. }
  1981. asml.remove(hp2);
  1982. hp2.Free;
  1983. end;
  1984. end;
  1985. end;
  1986. if GetNextInstruction_p and
  1987. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1988. GetNextInstruction(hp1, hp2) and
  1989. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1990. MatchOperand(Taicpu(p).oper[0]^,0) and
  1991. (Taicpu(p).oper[1]^.typ = top_reg) and
  1992. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1993. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1994. { mov reg1,0
  1995. bts reg1,operand1 --> mov reg1,operand2
  1996. or reg1,operand2 bts reg1,operand1}
  1997. begin
  1998. Taicpu(hp2).opcode:=A_MOV;
  1999. asml.remove(hp1);
  2000. insertllitem(hp2,hp2.next,hp1);
  2001. asml.remove(p);
  2002. p.free;
  2003. p:=hp1;
  2004. Result:=true;
  2005. exit;
  2006. end;
  2007. if GetNextInstruction_p and
  2008. MatchInstruction(hp1,A_LEA,[S_L]) and
  2009. MatchOpType(Taicpu(p),top_ref,top_reg) and
  2010. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  2011. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  2012. ) or
  2013. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  2014. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  2015. )
  2016. ) then
  2017. { mov reg1,ref
  2018. lea reg2,[reg1,reg2]
  2019. to
  2020. add reg2,ref}
  2021. begin
  2022. TransferUsedRegs(TmpUsedRegs);
  2023. { reg1 may not be used afterwards }
  2024. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2025. begin
  2026. Taicpu(hp1).opcode:=A_ADD;
  2027. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  2028. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  2029. asml.remove(p);
  2030. p.free;
  2031. p:=hp1;
  2032. result:=true;
  2033. exit;
  2034. end;
  2035. end;
  2036. end;
  2037. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  2038. var
  2039. hp1 : tai;
  2040. begin
  2041. Result:=false;
  2042. if taicpu(p).ops <> 2 then
  2043. exit;
  2044. if GetNextInstruction(p,hp1) and
  2045. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  2046. (taicpu(hp1).ops = 2) then
  2047. begin
  2048. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2049. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2050. { movXX reg1, mem1 or movXX mem1, reg1
  2051. movXX mem2, reg2 movXX reg2, mem2}
  2052. begin
  2053. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2054. { movXX reg1, mem1 or movXX mem1, reg1
  2055. movXX mem2, reg1 movXX reg2, mem1}
  2056. begin
  2057. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2058. begin
  2059. { Removes the second statement from
  2060. movXX reg1, mem1/reg2
  2061. movXX mem1/reg2, reg1
  2062. }
  2063. if taicpu(p).oper[0]^.typ=top_reg then
  2064. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2065. { Removes the second statement from
  2066. movXX mem1/reg1, reg2
  2067. movXX reg2, mem1/reg1
  2068. }
  2069. if (taicpu(p).oper[1]^.typ=top_reg) and
  2070. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  2071. begin
  2072. asml.remove(p);
  2073. p.free;
  2074. GetNextInstruction(hp1,p);
  2075. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  2076. end
  2077. else
  2078. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  2079. asml.remove(hp1);
  2080. hp1.free;
  2081. Result:=true;
  2082. exit;
  2083. end
  2084. end;
  2085. end;
  2086. end;
  2087. end;
  2088. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  2089. var
  2090. hp1 : tai;
  2091. begin
  2092. result:=false;
  2093. { replace
  2094. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  2095. MovX %mreg2,%mreg1
  2096. dealloc %mreg2
  2097. by
  2098. <Op>X %mreg2,%mreg1
  2099. ?
  2100. }
  2101. if GetNextInstruction(p,hp1) and
  2102. { we mix single and double opperations here because we assume that the compiler
  2103. generates vmovapd only after double operations and vmovaps only after single operations }
  2104. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  2105. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2106. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  2107. (taicpu(p).oper[0]^.typ=top_reg) then
  2108. begin
  2109. TransferUsedRegs(TmpUsedRegs);
  2110. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2111. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2112. begin
  2113. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  2114. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2115. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  2116. asml.Remove(hp1);
  2117. hp1.Free;
  2118. result:=true;
  2119. end;
  2120. end;
  2121. end;
  2122. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  2123. var
  2124. hp1, hp2, hp3: tai;
  2125. l : ASizeInt;
  2126. ref: Integer;
  2127. saveref: treference;
  2128. begin
  2129. Result:=false;
  2130. { removes seg register prefixes from LEA operations, as they
  2131. don't do anything}
  2132. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  2133. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  2134. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2135. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2136. { do not mess with leas acessing the stack pointer }
  2137. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2138. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  2139. begin
  2140. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2141. (taicpu(p).oper[0]^.ref^.offset = 0) then
  2142. begin
  2143. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  2144. taicpu(p).oper[1]^.reg);
  2145. InsertLLItem(p.previous,p.next, hp1);
  2146. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  2147. p.free;
  2148. p:=hp1;
  2149. Result:=true;
  2150. exit;
  2151. end
  2152. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  2153. begin
  2154. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  2155. RemoveCurrentP(p);
  2156. Result:=true;
  2157. exit;
  2158. end
  2159. { continue to use lea to adjust the stack pointer,
  2160. it is the recommended way, but only if not optimizing for size }
  2161. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  2162. (cs_opt_size in current_settings.optimizerswitches) then
  2163. with taicpu(p).oper[0]^.ref^ do
  2164. if (base = taicpu(p).oper[1]^.reg) then
  2165. begin
  2166. l:=offset;
  2167. if (l=1) and UseIncDec then
  2168. begin
  2169. taicpu(p).opcode:=A_INC;
  2170. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2171. taicpu(p).ops:=1;
  2172. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2173. end
  2174. else if (l=-1) and UseIncDec then
  2175. begin
  2176. taicpu(p).opcode:=A_DEC;
  2177. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2178. taicpu(p).ops:=1;
  2179. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2180. end
  2181. else
  2182. begin
  2183. if (l<0) and (l<>-2147483648) then
  2184. begin
  2185. taicpu(p).opcode:=A_SUB;
  2186. taicpu(p).loadConst(0,-l);
  2187. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2188. end
  2189. else
  2190. begin
  2191. taicpu(p).opcode:=A_ADD;
  2192. taicpu(p).loadConst(0,l);
  2193. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2194. end;
  2195. end;
  2196. Result:=true;
  2197. exit;
  2198. end;
  2199. end;
  2200. if GetNextInstruction(p,hp1) and
  2201. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2202. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2203. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2204. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2205. begin
  2206. TransferUsedRegs(TmpUsedRegs);
  2207. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2208. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2209. begin
  2210. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2211. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2212. asml.Remove(hp1);
  2213. hp1.Free;
  2214. result:=true;
  2215. end;
  2216. end;
  2217. { changes
  2218. lea offset1(regX), reg1
  2219. lea offset2(reg1), reg1
  2220. to
  2221. lea offset1+offset2(regX), reg1 }
  2222. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2223. MatchInstruction(hp1,A_LEA,[S_L]) and
  2224. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2225. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2226. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2227. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2228. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2229. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2230. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2231. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2232. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2233. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor) and
  2234. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2235. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2236. begin
  2237. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2238. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2239. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2240. RemoveCurrentP(p);
  2241. result:=true;
  2242. exit;
  2243. end;
  2244. { changes
  2245. lea <ref1>, reg1
  2246. <op> ...,<ref. with reg1>,...
  2247. to
  2248. <op> ...,<ref1>,... }
  2249. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  2250. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  2251. GetNextInstruction(p,hp1) and
  2252. (hp1.typ=ait_instruction) and
  2253. not(MatchInstruction(hp1,A_LEA,[])) then
  2254. begin
  2255. { find a reference which uses reg1 }
  2256. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  2257. ref:=0
  2258. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  2259. ref:=1
  2260. else
  2261. ref:=-1;
  2262. if (ref<>-1) and
  2263. { reg1 must be either the base or the index }
  2264. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  2265. begin
  2266. { reg1 can be removed from the reference }
  2267. saveref:=taicpu(hp1).oper[ref]^.ref^;
  2268. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  2269. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  2270. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  2271. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  2272. else
  2273. Internalerror(2019111201);
  2274. { check if the can insert all data of the lea into the second instruction }
  2275. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2276. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  2277. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  2278. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  2279. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  2280. ((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2281. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  2282. {$ifdef x86_64}
  2283. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  2284. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  2285. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  2286. )
  2287. {$endif x86_64}
  2288. then
  2289. begin
  2290. { reg1 might not used by the second instruction after it is remove from the reference }
  2291. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  2292. begin
  2293. TransferUsedRegs(TmpUsedRegs);
  2294. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2295. { reg1 is not updated so it might not be used afterwards }
  2296. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2297. begin
  2298. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  2299. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  2300. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2301. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2302. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2303. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  2304. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  2305. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  2306. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  2307. if not(taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) then
  2308. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  2309. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2310. RemoveCurrentP(p);
  2311. result:=true;
  2312. exit;
  2313. end
  2314. end;
  2315. end;
  2316. { recover }
  2317. taicpu(hp1).oper[ref]^.ref^:=saveref;
  2318. end;
  2319. end;
  2320. { replace
  2321. lea x(stackpointer),stackpointer
  2322. call procname
  2323. lea -x(stackpointer),stackpointer
  2324. ret
  2325. by
  2326. jmp procname
  2327. this should never hurt except when pic is used, not sure
  2328. how to handle it then
  2329. but do it only on level 4 because it destroys stack back traces
  2330. }
  2331. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2332. not(cs_create_pic in current_settings.moduleswitches) and
  2333. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2334. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  2335. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2336. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2337. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2338. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2339. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2340. GetNextInstruction(p, hp1) and
  2341. MatchInstruction(hp1,A_CALL,[S_NO]) and
  2342. GetNextInstruction(hp1, hp2) and
  2343. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  2344. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2345. (taicpu(p).oper[0]^.ref^.base=taicpu(hp2).oper[0]^.ref^.base) and
  2346. (taicpu(p).oper[0]^.ref^.index=taicpu(hp2).oper[0]^.ref^.index) and
  2347. (taicpu(p).oper[0]^.ref^.offset=-taicpu(hp2).oper[0]^.ref^.offset) and
  2348. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp2).oper[0]^.ref^.relsymbol) and
  2349. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp2).oper[0]^.ref^.scalefactor) and
  2350. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp2).oper[0]^.ref^.segment) and
  2351. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp2).oper[0]^.ref^.symbol) and
  2352. GetNextInstruction(hp2, hp3) and
  2353. MatchInstruction(hp3,A_RET,[S_NO]) and
  2354. (taicpu(hp3).ops=0) then
  2355. begin
  2356. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  2357. taicpu(hp1).opcode:=A_JMP;
  2358. taicpu(hp1).is_jmp:=true;
  2359. asml.remove(p);
  2360. asml.remove(hp2);
  2361. asml.remove(hp3);
  2362. p.free;
  2363. hp2.free;
  2364. hp3.free;
  2365. p:=hp1;
  2366. Result:=true;
  2367. end;
  2368. end;
  2369. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2370. var
  2371. hp1 : tai;
  2372. begin
  2373. DoSubAddOpt := False;
  2374. if GetLastInstruction(p, hp1) and
  2375. (hp1.typ = ait_instruction) and
  2376. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2377. case taicpu(hp1).opcode Of
  2378. A_DEC:
  2379. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2380. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2381. begin
  2382. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2383. asml.remove(hp1);
  2384. hp1.free;
  2385. end;
  2386. A_SUB:
  2387. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2388. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2389. begin
  2390. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2391. asml.remove(hp1);
  2392. hp1.free;
  2393. end;
  2394. A_ADD:
  2395. begin
  2396. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2397. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2398. begin
  2399. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2400. asml.remove(hp1);
  2401. hp1.free;
  2402. if (taicpu(p).oper[0]^.val = 0) then
  2403. begin
  2404. hp1 := tai(p.next);
  2405. asml.remove(p);
  2406. p.free;
  2407. if not GetLastInstruction(hp1, p) then
  2408. p := hp1;
  2409. DoSubAddOpt := True;
  2410. end
  2411. end;
  2412. end;
  2413. else
  2414. ;
  2415. end;
  2416. end;
  2417. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2418. {$ifdef i386}
  2419. var
  2420. hp1 : tai;
  2421. {$endif i386}
  2422. begin
  2423. Result:=false;
  2424. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2425. { * change "sub/add const1, reg" or "dec reg" followed by
  2426. "sub const2, reg" to one "sub ..., reg" }
  2427. if MatchOpType(taicpu(p),top_const,top_reg) then
  2428. begin
  2429. {$ifdef i386}
  2430. if (taicpu(p).oper[0]^.val = 2) and
  2431. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2432. { Don't do the sub/push optimization if the sub }
  2433. { comes from setting up the stack frame (JM) }
  2434. (not(GetLastInstruction(p,hp1)) or
  2435. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2436. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2437. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2438. begin
  2439. hp1 := tai(p.next);
  2440. while Assigned(hp1) and
  2441. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2442. not RegReadByInstruction(NR_ESP,hp1) and
  2443. not RegModifiedByInstruction(NR_ESP,hp1) do
  2444. hp1 := tai(hp1.next);
  2445. if Assigned(hp1) and
  2446. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2447. begin
  2448. taicpu(hp1).changeopsize(S_L);
  2449. if taicpu(hp1).oper[0]^.typ=top_reg then
  2450. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2451. hp1 := tai(p.next);
  2452. asml.remove(p);
  2453. p.free;
  2454. p := hp1;
  2455. Result:=true;
  2456. exit;
  2457. end;
  2458. end;
  2459. {$endif i386}
  2460. if DoSubAddOpt(p) then
  2461. Result:=true;
  2462. end;
  2463. end;
  2464. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2465. var
  2466. TmpBool1,TmpBool2 : Boolean;
  2467. tmpref : treference;
  2468. hp1,hp2: tai;
  2469. begin
  2470. Result:=false;
  2471. if MatchOpType(taicpu(p),top_const,top_reg) and
  2472. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2473. (taicpu(p).oper[0]^.val <= 3) then
  2474. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2475. begin
  2476. { should we check the next instruction? }
  2477. TmpBool1 := True;
  2478. { have we found an add/sub which could be
  2479. integrated in the lea? }
  2480. TmpBool2 := False;
  2481. reference_reset(tmpref,2,[]);
  2482. TmpRef.index := taicpu(p).oper[1]^.reg;
  2483. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2484. while TmpBool1 and
  2485. GetNextInstruction(p, hp1) and
  2486. (tai(hp1).typ = ait_instruction) and
  2487. ((((taicpu(hp1).opcode = A_ADD) or
  2488. (taicpu(hp1).opcode = A_SUB)) and
  2489. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2490. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2491. (((taicpu(hp1).opcode = A_INC) or
  2492. (taicpu(hp1).opcode = A_DEC)) and
  2493. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2494. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  2495. ((taicpu(hp1).opcode = A_LEA) and
  2496. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  2497. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  2498. (not GetNextInstruction(hp1,hp2) or
  2499. not instrReadsFlags(hp2)) Do
  2500. begin
  2501. TmpBool1 := False;
  2502. if taicpu(hp1).opcode=A_LEA then
  2503. begin
  2504. if (TmpRef.base = NR_NO) and
  2505. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  2506. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  2507. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  2508. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  2509. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  2510. begin
  2511. TmpBool1 := True;
  2512. TmpBool2 := True;
  2513. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  2514. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  2515. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  2516. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  2517. asml.remove(hp1);
  2518. hp1.free;
  2519. end
  2520. end
  2521. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2522. begin
  2523. TmpBool1 := True;
  2524. TmpBool2 := True;
  2525. case taicpu(hp1).opcode of
  2526. A_ADD:
  2527. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2528. A_SUB:
  2529. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2530. else
  2531. internalerror(2019050536);
  2532. end;
  2533. asml.remove(hp1);
  2534. hp1.free;
  2535. end
  2536. else
  2537. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2538. (((taicpu(hp1).opcode = A_ADD) and
  2539. (TmpRef.base = NR_NO)) or
  2540. (taicpu(hp1).opcode = A_INC) or
  2541. (taicpu(hp1).opcode = A_DEC)) then
  2542. begin
  2543. TmpBool1 := True;
  2544. TmpBool2 := True;
  2545. case taicpu(hp1).opcode of
  2546. A_ADD:
  2547. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2548. A_INC:
  2549. inc(TmpRef.offset);
  2550. A_DEC:
  2551. dec(TmpRef.offset);
  2552. else
  2553. internalerror(2019050535);
  2554. end;
  2555. asml.remove(hp1);
  2556. hp1.free;
  2557. end;
  2558. end;
  2559. if TmpBool2
  2560. {$ifndef x86_64}
  2561. or
  2562. ((current_settings.optimizecputype < cpu_Pentium2) and
  2563. (taicpu(p).oper[0]^.val <= 3) and
  2564. not(cs_opt_size in current_settings.optimizerswitches))
  2565. {$endif x86_64}
  2566. then
  2567. begin
  2568. if not(TmpBool2) and
  2569. (taicpu(p).oper[0]^.val=1) then
  2570. begin
  2571. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2572. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2573. end
  2574. else
  2575. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2576. taicpu(p).oper[1]^.reg);
  2577. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  2578. InsertLLItem(p.previous, p.next, hp1);
  2579. p.free;
  2580. p := hp1;
  2581. end;
  2582. end
  2583. {$ifndef x86_64}
  2584. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2585. MatchOpType(taicpu(p),top_const,top_reg) then
  2586. begin
  2587. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2588. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2589. (unlike shl, which is only Tairable in the U pipe) }
  2590. if taicpu(p).oper[0]^.val=1 then
  2591. begin
  2592. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2593. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2594. InsertLLItem(p.previous, p.next, hp1);
  2595. p.free;
  2596. p := hp1;
  2597. end
  2598. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2599. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2600. else if (taicpu(p).opsize = S_L) and
  2601. (taicpu(p).oper[0]^.val<= 3) then
  2602. begin
  2603. reference_reset(tmpref,2,[]);
  2604. TmpRef.index := taicpu(p).oper[1]^.reg;
  2605. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2606. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2607. InsertLLItem(p.previous, p.next, hp1);
  2608. p.free;
  2609. p := hp1;
  2610. end;
  2611. end
  2612. {$endif x86_64}
  2613. ;
  2614. end;
  2615. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2616. var
  2617. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  2618. begin
  2619. Result:=false;
  2620. if MatchOpType(taicpu(p),top_reg) and
  2621. GetNextInstruction(p, hp1) and
  2622. MatchInstruction(hp1, A_TEST, [S_B]) and
  2623. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2624. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  2625. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2626. GetNextInstruction(hp1, hp2) and
  2627. MatchInstruction(hp2, A_Jcc, []) then
  2628. { Change from: To:
  2629. set(C) %reg j(~C) label
  2630. test %reg,%reg
  2631. je label
  2632. set(C) %reg j(C) label
  2633. test %reg,%reg
  2634. jne label
  2635. }
  2636. begin
  2637. next := tai(p.Next);
  2638. TransferUsedRegs(TmpUsedRegs);
  2639. UpdateUsedRegs(TmpUsedRegs, next);
  2640. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2641. asml.Remove(hp1);
  2642. hp1.Free;
  2643. JumpC := taicpu(hp2).condition;
  2644. if conditions_equal(JumpC, C_E) then
  2645. SetC := inverse_cond(taicpu(p).condition)
  2646. else if conditions_equal(JumpC, C_NE) then
  2647. SetC := taicpu(p).condition
  2648. else
  2649. InternalError(2018061400);
  2650. if SetC = C_NONE then
  2651. InternalError(2018061401);
  2652. taicpu(hp2).SetCondition(SetC);
  2653. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2654. begin
  2655. asml.Remove(p);
  2656. UpdateUsedRegs(next);
  2657. p.Free;
  2658. Result := True;
  2659. p := hp2;
  2660. end;
  2661. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  2662. end;
  2663. end;
  2664. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2665. { returns true if a "continue" should be done after this optimization }
  2666. var
  2667. hp1, hp2: tai;
  2668. begin
  2669. Result := false;
  2670. if MatchOpType(taicpu(p),top_ref) and
  2671. GetNextInstruction(p, hp1) and
  2672. (hp1.typ = ait_instruction) and
  2673. (((taicpu(hp1).opcode = A_FLD) and
  2674. (taicpu(p).opcode = A_FSTP)) or
  2675. ((taicpu(p).opcode = A_FISTP) and
  2676. (taicpu(hp1).opcode = A_FILD))) and
  2677. MatchOpType(taicpu(hp1),top_ref) and
  2678. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2679. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2680. begin
  2681. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2682. if (taicpu(p).opsize=S_FX) and
  2683. GetNextInstruction(hp1, hp2) and
  2684. (hp2.typ = ait_instruction) and
  2685. IsExitCode(hp2) and
  2686. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2687. not(assigned(current_procinfo.procdef.funcretsym) and
  2688. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2689. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2690. begin
  2691. asml.remove(p);
  2692. asml.remove(hp1);
  2693. p.free;
  2694. hp1.free;
  2695. p := hp2;
  2696. RemoveLastDeallocForFuncRes(p);
  2697. Result := true;
  2698. end
  2699. (* can't be done because the store operation rounds
  2700. else
  2701. { fst can't store an extended value! }
  2702. if (taicpu(p).opsize <> S_FX) and
  2703. (taicpu(p).opsize <> S_IQ) then
  2704. begin
  2705. if (taicpu(p).opcode = A_FSTP) then
  2706. taicpu(p).opcode := A_FST
  2707. else taicpu(p).opcode := A_FIST;
  2708. asml.remove(hp1);
  2709. hp1.free;
  2710. end
  2711. *)
  2712. end;
  2713. end;
  2714. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2715. var
  2716. hp1, hp2: tai;
  2717. begin
  2718. result:=false;
  2719. if MatchOpType(taicpu(p),top_reg) and
  2720. GetNextInstruction(p, hp1) and
  2721. (hp1.typ = Ait_Instruction) and
  2722. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2723. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2724. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2725. { change to
  2726. fld reg fxxx reg,st
  2727. fxxxp st, st1 (hp1)
  2728. Remark: non commutative operations must be reversed!
  2729. }
  2730. begin
  2731. case taicpu(hp1).opcode Of
  2732. A_FMULP,A_FADDP,
  2733. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2734. begin
  2735. case taicpu(hp1).opcode Of
  2736. A_FADDP: taicpu(hp1).opcode := A_FADD;
  2737. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  2738. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  2739. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  2740. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  2741. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  2742. else
  2743. internalerror(2019050534);
  2744. end;
  2745. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2746. taicpu(hp1).oper[1]^.reg := NR_ST;
  2747. asml.remove(p);
  2748. p.free;
  2749. p := hp1;
  2750. Result:=true;
  2751. exit;
  2752. end;
  2753. else
  2754. ;
  2755. end;
  2756. end
  2757. else
  2758. if MatchOpType(taicpu(p),top_ref) and
  2759. GetNextInstruction(p, hp2) and
  2760. (hp2.typ = Ait_Instruction) and
  2761. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2762. (taicpu(p).opsize in [S_FS, S_FL]) and
  2763. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  2764. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  2765. if GetLastInstruction(p, hp1) and
  2766. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  2767. MatchOpType(taicpu(hp1),top_ref) and
  2768. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2769. if ((taicpu(hp2).opcode = A_FMULP) or
  2770. (taicpu(hp2).opcode = A_FADDP)) then
  2771. { change to
  2772. fld/fst mem1 (hp1) fld/fst mem1
  2773. fld mem1 (p) fadd/
  2774. faddp/ fmul st, st
  2775. fmulp st, st1 (hp2) }
  2776. begin
  2777. asml.remove(p);
  2778. p.free;
  2779. p := hp1;
  2780. if (taicpu(hp2).opcode = A_FADDP) then
  2781. taicpu(hp2).opcode := A_FADD
  2782. else
  2783. taicpu(hp2).opcode := A_FMUL;
  2784. taicpu(hp2).oper[1]^.reg := NR_ST;
  2785. end
  2786. else
  2787. { change to
  2788. fld/fst mem1 (hp1) fld/fst mem1
  2789. fld mem1 (p) fld st}
  2790. begin
  2791. taicpu(p).changeopsize(S_FL);
  2792. taicpu(p).loadreg(0,NR_ST);
  2793. end
  2794. else
  2795. begin
  2796. case taicpu(hp2).opcode Of
  2797. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2798. { change to
  2799. fld/fst mem1 (hp1) fld/fst mem1
  2800. fld mem2 (p) fxxx mem2
  2801. fxxxp st, st1 (hp2) }
  2802. begin
  2803. case taicpu(hp2).opcode Of
  2804. A_FADDP: taicpu(p).opcode := A_FADD;
  2805. A_FMULP: taicpu(p).opcode := A_FMUL;
  2806. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  2807. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  2808. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  2809. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  2810. else
  2811. internalerror(2019050533);
  2812. end;
  2813. asml.remove(hp2);
  2814. hp2.free;
  2815. end
  2816. else
  2817. ;
  2818. end
  2819. end
  2820. end;
  2821. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  2822. var
  2823. v: QWord;
  2824. hp1, hp2, hp3, hp4: tai;
  2825. begin
  2826. Result:=false;
  2827. { cmp register,$8000 neg register
  2828. je target --> jo target
  2829. .... only if register is deallocated before jump.}
  2830. case Taicpu(p).opsize of
  2831. S_B: v:=$80;
  2832. S_W: v:=$8000;
  2833. S_L: v:=qword($80000000);
  2834. S_Q : v:=qword($8000000000000000);
  2835. else
  2836. internalerror(2013112905);
  2837. end;
  2838. if MatchOpType(taicpu(p),Top_const,top_reg) and
  2839. (taicpu(p).oper[0]^.val=v) and
  2840. GetNextInstruction(p, hp1) and
  2841. MatchInstruction(hp1,A_Jcc,[]) and
  2842. (Taicpu(hp1).condition in [C_E,C_NE]) then
  2843. begin
  2844. TransferUsedRegs(TmpUsedRegs);
  2845. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2846. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  2847. begin
  2848. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  2849. Taicpu(p).opcode:=A_NEG;
  2850. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  2851. Taicpu(p).clearop(1);
  2852. Taicpu(p).ops:=1;
  2853. if Taicpu(hp1).condition=C_E then
  2854. Taicpu(hp1).condition:=C_O
  2855. else
  2856. Taicpu(hp1).condition:=C_NO;
  2857. Result:=true;
  2858. exit;
  2859. end;
  2860. end;
  2861. {
  2862. @@2: @@2:
  2863. .... ....
  2864. cmp operand1,0
  2865. jle/jbe @@1
  2866. dec operand1 --> sub operand1,1
  2867. jmp @@2 jge/jae @@2
  2868. @@1: @@1:
  2869. ... ....}
  2870. if (taicpu(p).oper[0]^.typ = top_const) and
  2871. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  2872. (taicpu(p).oper[0]^.val = 0) and
  2873. GetNextInstruction(p, hp1) and
  2874. MatchInstruction(hp1,A_Jcc,[]) and
  2875. (taicpu(hp1).condition in [C_LE,C_BE]) and
  2876. GetNextInstruction(hp1,hp2) and
  2877. MatchInstruction(hp1,A_DEC,[]) and
  2878. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  2879. GetNextInstruction(hp2, hp3) and
  2880. MatchInstruction(hp1,A_JMP,[]) and
  2881. GetNextInstruction(hp3, hp4) and
  2882. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  2883. begin
  2884. DebugMsg(SPeepholeOptimization + 'CmpJxxDecJmp2SubJcc done',p);
  2885. taicpu(hp2).Opcode := A_SUB;
  2886. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  2887. taicpu(hp2).loadConst(0,1);
  2888. taicpu(hp2).ops:=2;
  2889. taicpu(hp3).Opcode := A_Jcc;
  2890. case taicpu(hp1).condition of
  2891. C_LE: taicpu(hp3).condition := C_GE;
  2892. C_BE: taicpu(hp3).condition := C_AE;
  2893. else
  2894. internalerror(2019050903);
  2895. end;
  2896. asml.remove(p);
  2897. asml.remove(hp1);
  2898. p.free;
  2899. hp1.free;
  2900. p := hp2;
  2901. Result:=true;
  2902. exit;
  2903. end;
  2904. end;
  2905. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  2906. var
  2907. hp1,hp2: tai;
  2908. {$ifdef x86_64}
  2909. hp3: tai;
  2910. {$endif x86_64}
  2911. begin
  2912. Result:=false;
  2913. if MatchOpType(taicpu(p),top_reg,top_reg) and
  2914. GetNextInstruction(p, hp1) and
  2915. {$ifdef x86_64}
  2916. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  2917. {$else x86_64}
  2918. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  2919. {$endif x86_64}
  2920. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2921. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  2922. { mov reg1, reg2 mov reg1, reg2
  2923. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  2924. begin
  2925. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2926. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  2927. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  2928. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  2929. TransferUsedRegs(TmpUsedRegs);
  2930. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2931. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  2932. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  2933. then
  2934. begin
  2935. asml.remove(p);
  2936. p.free;
  2937. p := hp1;
  2938. Result:=true;
  2939. end;
  2940. exit;
  2941. end
  2942. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  2943. GetNextInstruction(p, hp1) and
  2944. {$ifdef x86_64}
  2945. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  2946. {$else x86_64}
  2947. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  2948. {$endif x86_64}
  2949. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2950. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  2951. or
  2952. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  2953. ) and
  2954. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2955. { mov reg1, reg2
  2956. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2957. begin
  2958. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2959. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2960. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2961. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2962. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  2963. asml.remove(p);
  2964. p.free;
  2965. p := hp1;
  2966. Result:=true;
  2967. exit;
  2968. end
  2969. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2970. GetNextInstruction(p,hp1) and
  2971. (hp1.typ = ait_instruction) and
  2972. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  2973. doing it separately in both branches allows to do the cheap checks
  2974. with low probability earlier }
  2975. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2976. GetNextInstruction(hp1,hp2) and
  2977. MatchInstruction(hp2,A_MOV,[])
  2978. ) or
  2979. ((taicpu(hp1).opcode=A_LEA) and
  2980. GetNextInstruction(hp1,hp2) and
  2981. MatchInstruction(hp2,A_MOV,[]) and
  2982. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2983. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  2984. ) or
  2985. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  2986. taicpu(p).oper[1]^.reg) and
  2987. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  2988. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  2989. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  2990. ) and
  2991. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  2992. )
  2993. ) and
  2994. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  2995. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2996. begin
  2997. TransferUsedRegs(TmpUsedRegs);
  2998. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2999. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  3000. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  3001. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  3002. { change mov (ref), reg
  3003. add/sub/or/... reg2/$const, reg
  3004. mov reg, (ref)
  3005. # release reg
  3006. to add/sub/or/... reg2/$const, (ref) }
  3007. begin
  3008. case taicpu(hp1).opcode of
  3009. A_INC,A_DEC,A_NOT,A_NEG :
  3010. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  3011. A_LEA :
  3012. begin
  3013. taicpu(hp1).opcode:=A_ADD;
  3014. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  3015. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  3016. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  3017. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  3018. else
  3019. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  3020. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3021. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  3022. end
  3023. else
  3024. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3025. end;
  3026. asml.remove(p);
  3027. asml.remove(hp2);
  3028. p.free;
  3029. hp2.free;
  3030. p := hp1
  3031. end;
  3032. Exit;
  3033. {$ifdef x86_64}
  3034. end
  3035. else if (taicpu(p).opsize = S_L) and
  3036. (taicpu(p).oper[1]^.typ = top_reg) and
  3037. (
  3038. GetNextInstruction(p, hp1) and
  3039. MatchInstruction(hp1, A_MOV,[]) and
  3040. (taicpu(hp1).opsize = S_L) and
  3041. (taicpu(hp1).oper[1]^.typ = top_reg)
  3042. ) and (
  3043. GetNextInstruction(hp1, hp2) and
  3044. (tai(hp2).typ=ait_instruction) and
  3045. (taicpu(hp2).opsize = S_Q) and
  3046. (
  3047. (
  3048. MatchInstruction(hp2, A_ADD,[]) and
  3049. (taicpu(hp2).opsize = S_Q) and
  3050. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3051. (
  3052. (
  3053. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3054. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3055. ) or (
  3056. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3057. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3058. )
  3059. )
  3060. ) or (
  3061. MatchInstruction(hp2, A_LEA,[]) and
  3062. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  3063. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  3064. (
  3065. (
  3066. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3067. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3068. ) or (
  3069. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3070. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  3071. )
  3072. ) and (
  3073. (
  3074. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3075. ) or (
  3076. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3077. )
  3078. )
  3079. )
  3080. )
  3081. ) and (
  3082. GetNextInstruction(hp2, hp3) and
  3083. MatchInstruction(hp3, A_SHR,[]) and
  3084. (taicpu(hp3).opsize = S_Q) and
  3085. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3086. (taicpu(hp3).oper[0]^.val = 1) and
  3087. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  3088. ) then
  3089. begin
  3090. { Change movl x, reg1d movl x, reg1d
  3091. movl y, reg2d movl y, reg2d
  3092. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  3093. shrq $1, reg1q shrq $1, reg1q
  3094. ( reg1d and reg2d can be switched around in the first two instructions )
  3095. To movl x, reg1d
  3096. addl y, reg1d
  3097. rcrl $1, reg1d
  3098. This corresponds to the common expression (x + y) shr 1, where
  3099. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  3100. smaller code, but won't account for x + y causing an overflow). [Kit]
  3101. }
  3102. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3103. { Change first MOV command to have the same register as the final output }
  3104. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  3105. else
  3106. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  3107. { Change second MOV command to an ADD command. This is easier than
  3108. converting the existing command because it means we don't have to
  3109. touch 'y', which might be a complicated reference, and also the
  3110. fact that the third command might either be ADD or LEA. [Kit] }
  3111. taicpu(hp1).opcode := A_ADD;
  3112. { Delete old ADD/LEA instruction }
  3113. asml.remove(hp2);
  3114. hp2.free;
  3115. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  3116. taicpu(hp3).opcode := A_RCR;
  3117. taicpu(hp3).changeopsize(S_L);
  3118. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  3119. {$endif x86_64}
  3120. end;
  3121. end;
  3122. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  3123. var
  3124. hp1 : tai;
  3125. begin
  3126. Result:=false;
  3127. if (taicpu(p).ops >= 2) and
  3128. ((taicpu(p).oper[0]^.typ = top_const) or
  3129. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  3130. (taicpu(p).oper[1]^.typ = top_reg) and
  3131. ((taicpu(p).ops = 2) or
  3132. ((taicpu(p).oper[2]^.typ = top_reg) and
  3133. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  3134. GetLastInstruction(p,hp1) and
  3135. MatchInstruction(hp1,A_MOV,[]) and
  3136. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3137. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3138. begin
  3139. TransferUsedRegs(TmpUsedRegs);
  3140. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  3141. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  3142. { change
  3143. mov reg1,reg2
  3144. imul y,reg2 to imul y,reg1,reg2 }
  3145. begin
  3146. taicpu(p).ops := 3;
  3147. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  3148. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3149. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  3150. asml.remove(hp1);
  3151. hp1.free;
  3152. result:=true;
  3153. end;
  3154. end;
  3155. end;
  3156. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  3157. var
  3158. hp1 : tai;
  3159. begin
  3160. {
  3161. change
  3162. jmp .L1
  3163. ...
  3164. .L1:
  3165. ret
  3166. into
  3167. ret
  3168. }
  3169. result:=false;
  3170. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  3171. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  3172. begin
  3173. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  3174. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  3175. MatchInstruction(hp1,A_RET,[S_NO]) then
  3176. begin
  3177. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  3178. taicpu(p).opcode:=A_RET;
  3179. taicpu(p).is_jmp:=false;
  3180. taicpu(p).ops:=taicpu(hp1).ops;
  3181. case taicpu(hp1).ops of
  3182. 0:
  3183. taicpu(p).clearop(0);
  3184. 1:
  3185. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  3186. else
  3187. internalerror(2016041301);
  3188. end;
  3189. result:=true;
  3190. end;
  3191. end;
  3192. end;
  3193. function CanBeCMOV(p : tai) : boolean;
  3194. begin
  3195. CanBeCMOV:=assigned(p) and
  3196. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  3197. { we can't use cmov ref,reg because
  3198. ref could be nil and cmov still throws an exception
  3199. if ref=nil but the mov isn't done (FK)
  3200. or ((taicpu(p).oper[0]^.typ = top_ref) and
  3201. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  3202. }
  3203. (MatchOpType(taicpu(p),top_reg,top_reg) or
  3204. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  3205. it is not expected that this can cause a seg. violation }
  3206. (MatchOpType(taicpu(p),top_ref,top_reg) and
  3207. (((taicpu(p).oper[0]^.ref^.base=NR_NO) and (taicpu(p).oper[0]^.ref^.refaddr=addr_no)){$ifdef x86_64} or
  3208. ((taicpu(p).oper[0]^.ref^.base=NR_RIP) and (taicpu(p).oper[0]^.ref^.refaddr=addr_pic)){$endif x86_64}
  3209. ) and
  3210. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  3211. (taicpu(p).oper[0]^.ref^.offset=0)
  3212. )
  3213. );
  3214. end;
  3215. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  3216. var
  3217. hp1,hp2,hp3,hp4,hpmov2: tai;
  3218. carryadd_opcode : TAsmOp;
  3219. l : Longint;
  3220. condition : TAsmCond;
  3221. symbol: TAsmSymbol;
  3222. begin
  3223. result:=false;
  3224. symbol:=nil;
  3225. if GetNextInstruction(p,hp1) then
  3226. begin
  3227. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  3228. if (hp1.typ=ait_instruction) and
  3229. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  3230. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  3231. { jb @@1 cmc
  3232. inc/dec operand --> adc/sbb operand,0
  3233. @@1:
  3234. ... and ...
  3235. jnb @@1
  3236. inc/dec operand --> adc/sbb operand,0
  3237. @@1: }
  3238. begin
  3239. carryadd_opcode:=A_NONE;
  3240. if Taicpu(p).condition in [C_NAE,C_B] then
  3241. begin
  3242. if Taicpu(hp1).opcode=A_INC then
  3243. carryadd_opcode:=A_ADC;
  3244. if Taicpu(hp1).opcode=A_DEC then
  3245. carryadd_opcode:=A_SBB;
  3246. if carryadd_opcode<>A_NONE then
  3247. begin
  3248. Taicpu(p).clearop(0);
  3249. Taicpu(p).ops:=0;
  3250. Taicpu(p).is_jmp:=false;
  3251. Taicpu(p).opcode:=A_CMC;
  3252. Taicpu(p).condition:=C_NONE;
  3253. Taicpu(hp1).ops:=2;
  3254. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3255. Taicpu(hp1).loadconst(0,0);
  3256. Taicpu(hp1).opcode:=carryadd_opcode;
  3257. result:=true;
  3258. exit;
  3259. end;
  3260. end;
  3261. if Taicpu(p).condition in [C_AE,C_NB] then
  3262. begin
  3263. if Taicpu(hp1).opcode=A_INC then
  3264. carryadd_opcode:=A_ADC;
  3265. if Taicpu(hp1).opcode=A_DEC then
  3266. carryadd_opcode:=A_SBB;
  3267. if carryadd_opcode<>A_NONE then
  3268. begin
  3269. asml.remove(p);
  3270. p.free;
  3271. Taicpu(hp1).ops:=2;
  3272. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  3273. Taicpu(hp1).loadconst(0,0);
  3274. Taicpu(hp1).opcode:=carryadd_opcode;
  3275. p:=hp1;
  3276. result:=true;
  3277. exit;
  3278. end;
  3279. end;
  3280. end;
  3281. { Detect the following:
  3282. jmp<cond> @Lbl1
  3283. jmp @Lbl2
  3284. ...
  3285. @Lbl1:
  3286. ret
  3287. Change to:
  3288. jmp<inv_cond> @Lbl2
  3289. ret
  3290. }
  3291. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  3292. begin
  3293. hp2:=getlabelwithsym(TAsmLabel(symbol));
  3294. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  3295. MatchInstruction(hp2,A_RET,[S_NO]) then
  3296. begin
  3297. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  3298. { Change label address to that of the unconditional jump }
  3299. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  3300. TAsmLabel(symbol).DecRefs;
  3301. taicpu(hp1).opcode := A_RET;
  3302. taicpu(hp1).is_jmp := false;
  3303. taicpu(hp1).ops := taicpu(hp2).ops;
  3304. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  3305. case taicpu(hp2).ops of
  3306. 0:
  3307. taicpu(hp1).clearop(0);
  3308. 1:
  3309. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  3310. else
  3311. internalerror(2016041302);
  3312. end;
  3313. end;
  3314. end;
  3315. end;
  3316. {$ifndef i8086}
  3317. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  3318. begin
  3319. { check for
  3320. jCC xxx
  3321. <several movs>
  3322. xxx:
  3323. }
  3324. l:=0;
  3325. GetNextInstruction(p, hp1);
  3326. while assigned(hp1) and
  3327. CanBeCMOV(hp1) and
  3328. { stop on labels }
  3329. not(hp1.typ=ait_label) do
  3330. begin
  3331. inc(l);
  3332. GetNextInstruction(hp1,hp1);
  3333. end;
  3334. if assigned(hp1) then
  3335. begin
  3336. if FindLabel(tasmlabel(symbol),hp1) then
  3337. begin
  3338. if (l<=4) and (l>0) then
  3339. begin
  3340. condition:=inverse_cond(taicpu(p).condition);
  3341. GetNextInstruction(p,hp1);
  3342. repeat
  3343. if not Assigned(hp1) then
  3344. InternalError(2018062900);
  3345. taicpu(hp1).opcode:=A_CMOVcc;
  3346. taicpu(hp1).condition:=condition;
  3347. UpdateUsedRegs(hp1);
  3348. GetNextInstruction(hp1,hp1);
  3349. until not(CanBeCMOV(hp1));
  3350. { Remember what hp1 is in case there's multiple aligns to get rid of }
  3351. hp2 := hp1;
  3352. repeat
  3353. if not Assigned(hp2) then
  3354. InternalError(2018062910);
  3355. case hp2.typ of
  3356. ait_label:
  3357. { What we expected - break out of the loop (it won't be a dead label at the top of
  3358. a cluster because that was optimised at an earlier stage) }
  3359. Break;
  3360. ait_align:
  3361. { Go to the next entry until a label is found (may be multiple aligns before it) }
  3362. begin
  3363. hp2 := tai(hp2.Next);
  3364. Continue;
  3365. end;
  3366. else
  3367. begin
  3368. { Might be a comment or temporary allocation entry }
  3369. if not (hp2.typ in SkipInstr) then
  3370. InternalError(2018062911);
  3371. hp2 := tai(hp2.Next);
  3372. Continue;
  3373. end;
  3374. end;
  3375. until False;
  3376. { Now we can safely decrement the reference count }
  3377. tasmlabel(symbol).decrefs;
  3378. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  3379. { Remove the original jump }
  3380. asml.Remove(p);
  3381. p.Free;
  3382. GetNextInstruction(hp2, p); { Instruction after the label }
  3383. { Remove the label if this is its final reference }
  3384. if (tasmlabel(symbol).getrefs=0) then
  3385. StripLabelFast(hp1);
  3386. if Assigned(p) then
  3387. begin
  3388. UpdateUsedRegs(p);
  3389. result:=true;
  3390. end;
  3391. exit;
  3392. end;
  3393. end
  3394. else
  3395. begin
  3396. { check further for
  3397. jCC xxx
  3398. <several movs 1>
  3399. jmp yyy
  3400. xxx:
  3401. <several movs 2>
  3402. yyy:
  3403. }
  3404. { hp2 points to jmp yyy }
  3405. hp2:=hp1;
  3406. { skip hp1 to xxx (or an align right before it) }
  3407. GetNextInstruction(hp1, hp1);
  3408. if assigned(hp2) and
  3409. assigned(hp1) and
  3410. (l<=3) and
  3411. (hp2.typ=ait_instruction) and
  3412. (taicpu(hp2).is_jmp) and
  3413. (taicpu(hp2).condition=C_None) and
  3414. { real label and jump, no further references to the
  3415. label are allowed }
  3416. (tasmlabel(symbol).getrefs=1) and
  3417. FindLabel(tasmlabel(symbol),hp1) then
  3418. begin
  3419. l:=0;
  3420. { skip hp1 to <several moves 2> }
  3421. if (hp1.typ = ait_align) then
  3422. GetNextInstruction(hp1, hp1);
  3423. GetNextInstruction(hp1, hpmov2);
  3424. hp1 := hpmov2;
  3425. while assigned(hp1) and
  3426. CanBeCMOV(hp1) do
  3427. begin
  3428. inc(l);
  3429. GetNextInstruction(hp1, hp1);
  3430. end;
  3431. { hp1 points to yyy (or an align right before it) }
  3432. hp3 := hp1;
  3433. if assigned(hp1) and
  3434. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  3435. begin
  3436. condition:=inverse_cond(taicpu(p).condition);
  3437. GetNextInstruction(p,hp1);
  3438. repeat
  3439. taicpu(hp1).opcode:=A_CMOVcc;
  3440. taicpu(hp1).condition:=condition;
  3441. UpdateUsedRegs(hp1);
  3442. GetNextInstruction(hp1,hp1);
  3443. until not(assigned(hp1)) or
  3444. not(CanBeCMOV(hp1));
  3445. condition:=inverse_cond(condition);
  3446. hp1 := hpmov2;
  3447. { hp1 is now at <several movs 2> }
  3448. while Assigned(hp1) and CanBeCMOV(hp1) do
  3449. begin
  3450. taicpu(hp1).opcode:=A_CMOVcc;
  3451. taicpu(hp1).condition:=condition;
  3452. UpdateUsedRegs(hp1);
  3453. GetNextInstruction(hp1,hp1);
  3454. end;
  3455. hp1 := p;
  3456. { Get first instruction after label }
  3457. GetNextInstruction(hp3, p);
  3458. if assigned(p) and (hp3.typ = ait_align) then
  3459. GetNextInstruction(p, p);
  3460. { Don't dereference yet, as doing so will cause
  3461. GetNextInstruction to skip the label and
  3462. optional align marker. [Kit] }
  3463. GetNextInstruction(hp2, hp4);
  3464. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  3465. { remove jCC }
  3466. asml.remove(hp1);
  3467. hp1.free;
  3468. { Now we can safely decrement it }
  3469. tasmlabel(symbol).decrefs;
  3470. { Remove label xxx (it will have a ref of zero due to the initial check }
  3471. StripLabelFast(hp4);
  3472. { remove jmp }
  3473. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  3474. asml.remove(hp2);
  3475. hp2.free;
  3476. { As before, now we can safely decrement it }
  3477. tasmlabel(symbol).decrefs;
  3478. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  3479. if tasmlabel(symbol).getrefs = 0 then
  3480. StripLabelFast(hp3);
  3481. if Assigned(p) then
  3482. begin
  3483. UpdateUsedRegs(p);
  3484. result:=true;
  3485. end;
  3486. exit;
  3487. end;
  3488. end;
  3489. end;
  3490. end;
  3491. end;
  3492. {$endif i8086}
  3493. end;
  3494. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  3495. var
  3496. hp1,hp2: tai;
  3497. begin
  3498. result:=false;
  3499. if (taicpu(p).oper[1]^.typ = top_reg) and
  3500. GetNextInstruction(p,hp1) and
  3501. (hp1.typ = ait_instruction) and
  3502. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3503. GetNextInstruction(hp1,hp2) and
  3504. MatchInstruction(hp2,A_MOV,[]) and
  3505. (taicpu(hp2).oper[0]^.typ = top_reg) and
  3506. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  3507. {$ifdef i386}
  3508. { not all registers have byte size sub registers on i386 }
  3509. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  3510. {$endif i386}
  3511. (((taicpu(hp1).ops=2) and
  3512. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  3513. ((taicpu(hp1).ops=1) and
  3514. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  3515. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  3516. begin
  3517. { change movsX/movzX reg/ref, reg2
  3518. add/sub/or/... reg3/$const, reg2
  3519. mov reg2 reg/ref
  3520. to add/sub/or/... reg3/$const, reg/ref }
  3521. { by example:
  3522. movswl %si,%eax movswl %si,%eax p
  3523. decl %eax addl %edx,%eax hp1
  3524. movw %ax,%si movw %ax,%si hp2
  3525. ->
  3526. movswl %si,%eax movswl %si,%eax p
  3527. decw %eax addw %edx,%eax hp1
  3528. movw %ax,%si movw %ax,%si hp2
  3529. }
  3530. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3531. {
  3532. ->
  3533. movswl %si,%eax movswl %si,%eax p
  3534. decw %si addw %dx,%si hp1
  3535. movw %ax,%si movw %ax,%si hp2
  3536. }
  3537. case taicpu(hp1).ops of
  3538. 1:
  3539. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  3540. 2:
  3541. begin
  3542. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  3543. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  3544. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3545. end;
  3546. else
  3547. internalerror(2008042701);
  3548. end;
  3549. {
  3550. ->
  3551. decw %si addw %dx,%si p
  3552. }
  3553. DebugMsg(SPeepholeOptimization + 'var3',p);
  3554. asml.remove(p);
  3555. asml.remove(hp2);
  3556. p.free;
  3557. hp2.free;
  3558. p:=hp1;
  3559. end
  3560. else if taicpu(p).opcode=A_MOVZX then
  3561. begin
  3562. { removes superfluous And's after movzx's }
  3563. if (taicpu(p).oper[1]^.typ = top_reg) and
  3564. GetNextInstruction(p, hp1) and
  3565. (tai(hp1).typ = ait_instruction) and
  3566. (taicpu(hp1).opcode = A_AND) and
  3567. (taicpu(hp1).oper[0]^.typ = top_const) and
  3568. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3569. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3570. begin
  3571. case taicpu(p).opsize Of
  3572. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  3573. if (taicpu(hp1).oper[0]^.val = $ff) then
  3574. begin
  3575. DebugMsg(SPeepholeOptimization + 'var4',p);
  3576. asml.remove(hp1);
  3577. hp1.free;
  3578. end;
  3579. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  3580. if (taicpu(hp1).oper[0]^.val = $ffff) then
  3581. begin
  3582. DebugMsg(SPeepholeOptimization + 'var5',p);
  3583. asml.remove(hp1);
  3584. hp1.free;
  3585. end;
  3586. {$ifdef x86_64}
  3587. S_LQ:
  3588. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  3589. begin
  3590. if (cs_asm_source in current_settings.globalswitches) then
  3591. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  3592. asml.remove(hp1);
  3593. hp1.Free;
  3594. end;
  3595. {$endif x86_64}
  3596. else
  3597. ;
  3598. end;
  3599. end;
  3600. { changes some movzx constructs to faster synonims (all examples
  3601. are given with eax/ax, but are also valid for other registers)}
  3602. if (taicpu(p).oper[1]^.typ = top_reg) then
  3603. if (taicpu(p).oper[0]^.typ = top_reg) then
  3604. case taicpu(p).opsize of
  3605. S_BW:
  3606. begin
  3607. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3608. not(cs_opt_size in current_settings.optimizerswitches) then
  3609. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  3610. begin
  3611. taicpu(p).opcode := A_AND;
  3612. taicpu(p).changeopsize(S_W);
  3613. taicpu(p).loadConst(0,$ff);
  3614. DebugMsg(SPeepholeOptimization + 'var7',p);
  3615. end
  3616. else if GetNextInstruction(p, hp1) and
  3617. (tai(hp1).typ = ait_instruction) and
  3618. (taicpu(hp1).opcode = A_AND) and
  3619. (taicpu(hp1).oper[0]^.typ = top_const) and
  3620. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3621. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3622. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  3623. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  3624. begin
  3625. DebugMsg(SPeepholeOptimization + 'var8',p);
  3626. taicpu(p).opcode := A_MOV;
  3627. taicpu(p).changeopsize(S_W);
  3628. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  3629. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3630. end;
  3631. end;
  3632. S_BL:
  3633. begin
  3634. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3635. not(cs_opt_size in current_settings.optimizerswitches) then
  3636. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  3637. begin
  3638. taicpu(p).opcode := A_AND;
  3639. taicpu(p).changeopsize(S_L);
  3640. taicpu(p).loadConst(0,$ff)
  3641. end
  3642. else if GetNextInstruction(p, hp1) and
  3643. (tai(hp1).typ = ait_instruction) and
  3644. (taicpu(hp1).opcode = A_AND) and
  3645. (taicpu(hp1).oper[0]^.typ = top_const) and
  3646. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3647. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3648. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  3649. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  3650. begin
  3651. DebugMsg(SPeepholeOptimization + 'var10',p);
  3652. taicpu(p).opcode := A_MOV;
  3653. taicpu(p).changeopsize(S_L);
  3654. { do not use R_SUBWHOLE
  3655. as movl %rdx,%eax
  3656. is invalid in assembler PM }
  3657. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3658. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3659. end
  3660. end;
  3661. {$ifndef i8086}
  3662. S_WL:
  3663. begin
  3664. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3665. not(cs_opt_size in current_settings.optimizerswitches) then
  3666. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  3667. begin
  3668. DebugMsg(SPeepholeOptimization + 'var11',p);
  3669. taicpu(p).opcode := A_AND;
  3670. taicpu(p).changeopsize(S_L);
  3671. taicpu(p).loadConst(0,$ffff);
  3672. end
  3673. else if GetNextInstruction(p, hp1) and
  3674. (tai(hp1).typ = ait_instruction) and
  3675. (taicpu(hp1).opcode = A_AND) and
  3676. (taicpu(hp1).oper[0]^.typ = top_const) and
  3677. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3678. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3679. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  3680. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  3681. begin
  3682. DebugMsg(SPeepholeOptimization + 'var12',p);
  3683. taicpu(p).opcode := A_MOV;
  3684. taicpu(p).changeopsize(S_L);
  3685. { do not use R_SUBWHOLE
  3686. as movl %rdx,%eax
  3687. is invalid in assembler PM }
  3688. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3689. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3690. end;
  3691. end;
  3692. {$endif i8086}
  3693. else
  3694. ;
  3695. end
  3696. else if (taicpu(p).oper[0]^.typ = top_ref) then
  3697. begin
  3698. if GetNextInstruction(p, hp1) and
  3699. (tai(hp1).typ = ait_instruction) and
  3700. (taicpu(hp1).opcode = A_AND) and
  3701. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3702. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3703. begin
  3704. //taicpu(p).opcode := A_MOV;
  3705. case taicpu(p).opsize Of
  3706. S_BL:
  3707. begin
  3708. DebugMsg(SPeepholeOptimization + 'var13',p);
  3709. taicpu(hp1).changeopsize(S_L);
  3710. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3711. end;
  3712. S_WL:
  3713. begin
  3714. DebugMsg(SPeepholeOptimization + 'var14',p);
  3715. taicpu(hp1).changeopsize(S_L);
  3716. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3717. end;
  3718. S_BW:
  3719. begin
  3720. DebugMsg(SPeepholeOptimization + 'var15',p);
  3721. taicpu(hp1).changeopsize(S_W);
  3722. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3723. end;
  3724. {$ifdef x86_64}
  3725. S_BQ:
  3726. begin
  3727. DebugMsg(SPeepholeOptimization + 'var16',p);
  3728. taicpu(hp1).changeopsize(S_Q);
  3729. taicpu(hp1).loadConst(
  3730. 0, taicpu(hp1).oper[0]^.val and $ff);
  3731. end;
  3732. S_WQ:
  3733. begin
  3734. DebugMsg(SPeepholeOptimization + 'var17',p);
  3735. taicpu(hp1).changeopsize(S_Q);
  3736. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  3737. end;
  3738. S_LQ:
  3739. begin
  3740. DebugMsg(SPeepholeOptimization + 'var18',p);
  3741. taicpu(hp1).changeopsize(S_Q);
  3742. taicpu(hp1).loadConst(
  3743. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  3744. end;
  3745. {$endif x86_64}
  3746. else
  3747. Internalerror(2017050704)
  3748. end;
  3749. end;
  3750. end;
  3751. end;
  3752. end;
  3753. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  3754. var
  3755. hp1 : tai;
  3756. MaskLength : Cardinal;
  3757. begin
  3758. Result:=false;
  3759. if GetNextInstruction(p, hp1) then
  3760. begin
  3761. if MatchOpType(taicpu(p),top_const,top_reg) and
  3762. MatchInstruction(hp1,A_AND,[]) and
  3763. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3764. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3765. { the second register must contain the first one, so compare their subreg types }
  3766. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  3767. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  3768. { change
  3769. and const1, reg
  3770. and const2, reg
  3771. to
  3772. and (const1 and const2), reg
  3773. }
  3774. begin
  3775. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  3776. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  3777. asml.remove(p);
  3778. p.Free;
  3779. p:=hp1;
  3780. Result:=true;
  3781. exit;
  3782. end
  3783. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3784. MatchInstruction(hp1,A_MOVZX,[]) and
  3785. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3786. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3787. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3788. (((taicpu(p).opsize=S_W) and
  3789. (taicpu(hp1).opsize=S_BW)) or
  3790. ((taicpu(p).opsize=S_L) and
  3791. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3792. {$ifdef x86_64}
  3793. or
  3794. ((taicpu(p).opsize=S_Q) and
  3795. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  3796. {$endif x86_64}
  3797. ) then
  3798. begin
  3799. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3800. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  3801. ) or
  3802. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3803. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  3804. then
  3805. begin
  3806. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  3807. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  3808. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  3809. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  3810. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  3811. }
  3812. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  3813. asml.remove(hp1);
  3814. hp1.free;
  3815. Exit;
  3816. end;
  3817. end
  3818. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3819. MatchInstruction(hp1,A_SHL,[]) and
  3820. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3821. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3822. begin
  3823. {$ifopt R+}
  3824. {$define RANGE_WAS_ON}
  3825. {$R-}
  3826. {$endif}
  3827. { get length of potential and mask }
  3828. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  3829. { really a mask? }
  3830. {$ifdef RANGE_WAS_ON}
  3831. {$R+}
  3832. {$endif}
  3833. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  3834. { unmasked part shifted out? }
  3835. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  3836. begin
  3837. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  3838. { take care of the register (de)allocs following p }
  3839. UpdateUsedRegs(tai(p.next));
  3840. asml.remove(p);
  3841. p.free;
  3842. p:=hp1;
  3843. Result:=true;
  3844. exit;
  3845. end;
  3846. end
  3847. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3848. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  3849. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3850. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3851. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3852. (((taicpu(p).opsize=S_W) and
  3853. (taicpu(hp1).opsize=S_BW)) or
  3854. ((taicpu(p).opsize=S_L) and
  3855. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3856. {$ifdef x86_64}
  3857. or
  3858. ((taicpu(p).opsize=S_Q) and
  3859. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  3860. {$endif x86_64}
  3861. ) then
  3862. begin
  3863. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3864. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  3865. ) or
  3866. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3867. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  3868. {$ifdef x86_64}
  3869. or
  3870. (((taicpu(hp1).opsize)=S_LQ) and
  3871. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  3872. )
  3873. {$endif x86_64}
  3874. then
  3875. begin
  3876. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  3877. asml.remove(hp1);
  3878. hp1.free;
  3879. Exit;
  3880. end;
  3881. end
  3882. else if (taicpu(p).oper[1]^.typ = top_reg) and
  3883. (hp1.typ = ait_instruction) and
  3884. (taicpu(hp1).is_jmp) and
  3885. (taicpu(hp1).opcode<>A_JMP) and
  3886. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  3887. begin
  3888. { change
  3889. and x, reg
  3890. jxx
  3891. to
  3892. test x, reg
  3893. jxx
  3894. if reg is deallocated before the
  3895. jump, but only if it's a conditional jump (PFV)
  3896. }
  3897. taicpu(p).opcode := A_TEST;
  3898. Exit;
  3899. end;
  3900. end;
  3901. { Lone AND tests }
  3902. if MatchOpType(taicpu(p),top_const,top_reg) then
  3903. begin
  3904. {
  3905. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  3906. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  3907. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  3908. }
  3909. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  3910. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  3911. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  3912. begin
  3913. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  3914. end;
  3915. end;
  3916. end;
  3917. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  3918. begin
  3919. Result:=false;
  3920. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3921. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3922. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  3923. begin
  3924. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  3925. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  3926. taicpu(p).opcode:=A_ADD;
  3927. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  3928. result:=true;
  3929. end
  3930. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3931. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  3932. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  3933. begin
  3934. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  3935. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  3936. taicpu(p).opcode:=A_ADD;
  3937. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  3938. result:=true;
  3939. end;
  3940. end;
  3941. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  3942. function SkipSimpleInstructions(var hp1 : tai) : Boolean;
  3943. begin
  3944. { we can skip all instructions not messing with the stack pointer }
  3945. while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  3946. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  3947. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  3948. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  3949. ({(taicpu(hp1).ops=0) or }
  3950. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  3951. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  3952. ) and }
  3953. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  3954. )
  3955. ) do
  3956. GetNextInstruction(hp1,hp1);
  3957. Result:=assigned(hp1);
  3958. end;
  3959. var
  3960. hp1, hp2, hp3: tai;
  3961. begin
  3962. Result:=false;
  3963. { replace
  3964. leal(q) x(<stackpointer>),<stackpointer>
  3965. call procname
  3966. leal(q) -x(<stackpointer>),<stackpointer>
  3967. ret
  3968. by
  3969. jmp procname
  3970. but do it only on level 4 because it destroys stack back traces
  3971. }
  3972. if (cs_opt_level4 in current_settings.optimizerswitches) and
  3973. MatchOpType(taicpu(p),top_ref,top_reg) and
  3974. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  3975. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  3976. { the -8 or -24 are not required, but bail out early if possible,
  3977. higher values are unlikely }
  3978. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  3979. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  3980. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  3981. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  3982. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  3983. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  3984. GetNextInstruction(p, hp1) and
  3985. { trick to skip label }
  3986. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  3987. SkipSimpleInstructions(hp1) and
  3988. MatchInstruction(hp1,A_CALL,[S_NO]) and
  3989. GetNextInstruction(hp1, hp2) and
  3990. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  3991. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  3992. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  3993. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  3994. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  3995. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  3996. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  3997. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  3998. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  3999. GetNextInstruction(hp2, hp3) and
  4000. { trick to skip label }
  4001. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  4002. MatchInstruction(hp3,A_RET,[S_NO]) and
  4003. (taicpu(hp3).ops=0) then
  4004. begin
  4005. taicpu(hp1).opcode := A_JMP;
  4006. taicpu(hp1).is_jmp := true;
  4007. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  4008. RemoveCurrentP(p);
  4009. AsmL.Remove(hp2);
  4010. hp2.free;
  4011. AsmL.Remove(hp3);
  4012. hp3.free;
  4013. Result:=true;
  4014. end;
  4015. end;
  4016. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  4017. var
  4018. Value, RegName: string;
  4019. begin
  4020. Result:=false;
  4021. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  4022. begin
  4023. case taicpu(p).oper[0]^.val of
  4024. 0:
  4025. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  4026. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4027. begin
  4028. { change "mov $0,%reg" into "xor %reg,%reg" }
  4029. taicpu(p).opcode := A_XOR;
  4030. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  4031. Result := True;
  4032. end;
  4033. $1..$FFFFFFFF:
  4034. begin
  4035. { Code size reduction by J. Gareth "Kit" Moreton }
  4036. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  4037. case taicpu(p).opsize of
  4038. S_Q:
  4039. begin
  4040. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  4041. Value := debug_tostr(taicpu(p).oper[0]^.val);
  4042. { The actual optimization }
  4043. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4044. taicpu(p).changeopsize(S_L);
  4045. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  4046. Result := True;
  4047. end;
  4048. else
  4049. { Do nothing };
  4050. end;
  4051. end;
  4052. -1:
  4053. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  4054. if (cs_opt_size in current_settings.optimizerswitches) and
  4055. (taicpu(p).opsize <> S_B) and
  4056. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4057. begin
  4058. { change "mov $-1,%reg" into "or $-1,%reg" }
  4059. { NOTES:
  4060. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  4061. - This operation creates a false dependency on the register, so only do it when optimising for size
  4062. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  4063. }
  4064. taicpu(p).opcode := A_OR;
  4065. Result := True;
  4066. end;
  4067. end;
  4068. end;
  4069. end;
  4070. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  4071. begin
  4072. Result:=false;
  4073. { change "cmp $0, %reg" to "test %reg, %reg" }
  4074. if MatchOpType(taicpu(p),top_const,top_reg) and
  4075. (taicpu(p).oper[0]^.val = 0) then
  4076. begin
  4077. taicpu(p).opcode := A_TEST;
  4078. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  4079. Result:=true;
  4080. end;
  4081. end;
  4082. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  4083. var
  4084. IsTestConstX : Boolean;
  4085. hp1,hp2 : tai;
  4086. begin
  4087. Result:=false;
  4088. { removes the line marked with (x) from the sequence
  4089. and/or/xor/add/sub/... $x, %y
  4090. test/or %y, %y | test $-1, %y (x)
  4091. j(n)z _Label
  4092. as the first instruction already adjusts the ZF
  4093. %y operand may also be a reference }
  4094. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  4095. MatchOperand(taicpu(p).oper[0]^,-1);
  4096. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  4097. GetLastInstruction(p, hp1) and
  4098. (tai(hp1).typ = ait_instruction) and
  4099. GetNextInstruction(p,hp2) and
  4100. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  4101. case taicpu(hp1).opcode Of
  4102. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  4103. begin
  4104. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4105. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4106. { and in case of carry for A(E)/B(E)/C/NC }
  4107. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  4108. ((taicpu(hp1).opcode <> A_ADD) and
  4109. (taicpu(hp1).opcode <> A_SUB))) then
  4110. begin
  4111. hp1 := tai(p.next);
  4112. asml.remove(p);
  4113. p.free;
  4114. p := tai(hp1);
  4115. Result:=true;
  4116. end;
  4117. end;
  4118. A_SHL, A_SAL, A_SHR, A_SAR:
  4119. begin
  4120. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  4121. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  4122. { therefore, it's only safe to do this optimization for }
  4123. { shifts by a (nonzero) constant }
  4124. (taicpu(hp1).oper[0]^.typ = top_const) and
  4125. (taicpu(hp1).oper[0]^.val <> 0) and
  4126. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4127. { and in case of carry for A(E)/B(E)/C/NC }
  4128. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4129. begin
  4130. hp1 := tai(p.next);
  4131. asml.remove(p);
  4132. p.free;
  4133. p := tai(hp1);
  4134. Result:=true;
  4135. end;
  4136. end;
  4137. A_DEC, A_INC, A_NEG:
  4138. begin
  4139. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  4140. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  4141. { and in case of carry for A(E)/B(E)/C/NC }
  4142. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  4143. begin
  4144. case taicpu(hp1).opcode of
  4145. A_DEC, A_INC:
  4146. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  4147. begin
  4148. case taicpu(hp1).opcode Of
  4149. A_DEC: taicpu(hp1).opcode := A_SUB;
  4150. A_INC: taicpu(hp1).opcode := A_ADD;
  4151. else
  4152. ;
  4153. end;
  4154. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  4155. taicpu(hp1).loadConst(0,1);
  4156. taicpu(hp1).ops:=2;
  4157. end;
  4158. else
  4159. ;
  4160. end;
  4161. hp1 := tai(p.next);
  4162. asml.remove(p);
  4163. p.free;
  4164. p := tai(hp1);
  4165. Result:=true;
  4166. end;
  4167. end
  4168. else
  4169. { change "test $-1,%reg" into "test %reg,%reg" }
  4170. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4171. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4172. end { case }
  4173. { change "test $-1,%reg" into "test %reg,%reg" }
  4174. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  4175. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  4176. end;
  4177. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  4178. var
  4179. hp1 : tai;
  4180. {$ifndef x86_64}
  4181. hp2 : taicpu;
  4182. {$endif x86_64}
  4183. begin
  4184. Result:=false;
  4185. {$ifndef x86_64}
  4186. { don't do this on modern CPUs, this really hurts them due to
  4187. broken call/ret pairing }
  4188. if (current_settings.optimizecputype < cpu_Pentium2) and
  4189. not(cs_create_pic in current_settings.moduleswitches) and
  4190. GetNextInstruction(p, hp1) and
  4191. MatchInstruction(hp1,A_JMP,[S_NO]) and
  4192. MatchOpType(taicpu(hp1),top_ref) and
  4193. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  4194. begin
  4195. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  4196. InsertLLItem(p.previous, p, hp2);
  4197. taicpu(p).opcode := A_JMP;
  4198. taicpu(p).is_jmp := true;
  4199. asml.remove(hp1);
  4200. hp1.free;
  4201. Result:=true;
  4202. end
  4203. else
  4204. {$endif x86_64}
  4205. { replace
  4206. call procname
  4207. ret
  4208. by
  4209. jmp procname
  4210. but do it only on level 4 because it destroys stack back traces
  4211. }
  4212. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4213. GetNextInstruction(p, hp1) and
  4214. MatchInstruction(hp1,A_RET,[S_NO]) and
  4215. (taicpu(hp1).ops=0) then
  4216. begin
  4217. taicpu(p).opcode := A_JMP;
  4218. taicpu(p).is_jmp := true;
  4219. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  4220. asml.remove(hp1);
  4221. hp1.free;
  4222. Result:=true;
  4223. end;
  4224. end;
  4225. {$ifdef x86_64}
  4226. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  4227. var
  4228. PreMessage: string;
  4229. begin
  4230. Result := False;
  4231. { Code size reduction by J. Gareth "Kit" Moreton }
  4232. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  4233. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  4234. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  4235. then
  4236. begin
  4237. { Has 64-bit register name and opcode suffix }
  4238. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  4239. { The actual optimization }
  4240. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4241. if taicpu(p).opsize = S_BQ then
  4242. taicpu(p).changeopsize(S_BL)
  4243. else
  4244. taicpu(p).changeopsize(S_WL);
  4245. DebugMsg(SPeepholeOptimization + PreMessage +
  4246. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  4247. end;
  4248. end;
  4249. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  4250. var
  4251. PreMessage, RegName: string;
  4252. begin
  4253. { Code size reduction by J. Gareth "Kit" Moreton }
  4254. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  4255. as this removes the REX prefix }
  4256. Result := False;
  4257. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  4258. Exit;
  4259. if taicpu(p).oper[0]^.typ <> top_reg then
  4260. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  4261. InternalError(2018011500);
  4262. case taicpu(p).opsize of
  4263. S_Q:
  4264. begin
  4265. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  4266. begin
  4267. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  4268. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  4269. { The actual optimization }
  4270. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4271. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4272. taicpu(p).changeopsize(S_L);
  4273. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  4274. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  4275. end;
  4276. end;
  4277. else
  4278. ;
  4279. end;
  4280. end;
  4281. {$endif}
  4282. procedure TX86AsmOptimizer.OptReferences;
  4283. var
  4284. p: tai;
  4285. i: Integer;
  4286. begin
  4287. p := BlockStart;
  4288. while (p <> BlockEnd) Do
  4289. begin
  4290. if p.typ=ait_instruction then
  4291. begin
  4292. for i:=0 to taicpu(p).ops-1 do
  4293. if taicpu(p).oper[i]^.typ=top_ref then
  4294. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  4295. end;
  4296. p:=tai(p.next);
  4297. end;
  4298. end;
  4299. end.