aoptx86.pas 236 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  33. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  34. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  35. protected
  36. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  37. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  38. { checks whether reading the value in reg1 depends on the value of reg2. This
  39. is very similar to SuperRegisterEquals, except it takes into account that
  40. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  41. depend on the value in AH). }
  42. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  43. procedure DebugMsg(const s : string; p : tai);inline;
  44. class function IsExitCode(p : tai) : boolean; static;
  45. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  46. procedure RemoveLastDeallocForFuncRes(p : tai);
  47. function DoSubAddOpt(var p : tai) : Boolean;
  48. function PrePeepholeOptSxx(var p : tai) : boolean;
  49. function PrePeepholeOptIMUL(var p : tai) : boolean;
  50. function OptPass1AND(var p : tai) : boolean;
  51. function OptPass1_V_MOVAP(var p : tai) : boolean;
  52. function OptPass1VOP(var p : tai) : boolean;
  53. function OptPass1MOV(var p : tai) : boolean;
  54. function OptPass1Movx(var p : tai) : boolean;
  55. function OptPass1MOVXX(var p : tai) : boolean;
  56. function OptPass1OP(var p : tai) : boolean;
  57. function OptPass1LEA(var p : tai) : boolean;
  58. function OptPass1Sub(var p : tai) : boolean;
  59. function OptPass1SHLSAL(var p : tai) : boolean;
  60. function OptPass1SETcc(var p : tai) : boolean;
  61. function OptPass1FSTP(var p : tai) : boolean;
  62. function OptPass1FLD(var p : tai) : boolean;
  63. function OptPass1Cmp(var p : tai) : boolean;
  64. function OptPass2MOV(var p : tai) : boolean;
  65. function OptPass2Imul(var p : tai) : boolean;
  66. function OptPass2Jmp(var p : tai) : boolean;
  67. function OptPass2Jcc(var p : tai) : boolean;
  68. function OptPass2Lea(var p: tai): Boolean;
  69. function OptPass2SUB(var p: tai): Boolean;
  70. function PostPeepholeOptMov(var p : tai) : Boolean;
  71. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  72. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  73. function PostPeepholeOptXor(var p : tai) : Boolean;
  74. {$endif}
  75. function PostPeepholeOptMOVSX(var p : tai) : boolean;
  76. function PostPeepholeOptCmp(var p : tai) : Boolean;
  77. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  78. function PostPeepholeOptCall(var p : tai) : Boolean;
  79. function PostPeepholeOptLea(var p : tai) : Boolean;
  80. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  81. { Processor-dependent reference optimisation }
  82. class procedure OptimizeRefs(var p: taicpu); static;
  83. end;
  84. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  85. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  86. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  87. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  88. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  89. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  90. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  91. function RefsEqual(const r1, r2: treference): boolean;
  92. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  93. { returns true, if ref is a reference using only the registers passed as base and index
  94. and having an offset }
  95. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  96. implementation
  97. uses
  98. cutils,verbose,
  99. globals,
  100. cpuinfo,
  101. procinfo,
  102. aasmbase,
  103. aoptutils,
  104. symconst,symsym,
  105. cgx86,
  106. itcpugas;
  107. {$ifdef DEBUG_AOPTCPU}
  108. const
  109. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  110. {$else DEBUG_AOPTCPU}
  111. { Empty strings help the optimizer to remove string concatenations that won't
  112. ever appear to the user on release builds. [Kit] }
  113. const
  114. SPeepholeOptimization = '';
  115. {$endif DEBUG_AOPTCPU}
  116. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  117. begin
  118. result :=
  119. (instr.typ = ait_instruction) and
  120. (taicpu(instr).opcode = op) and
  121. ((opsize = []) or (taicpu(instr).opsize in opsize));
  122. end;
  123. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  124. begin
  125. result :=
  126. (instr.typ = ait_instruction) and
  127. ((taicpu(instr).opcode = op1) or
  128. (taicpu(instr).opcode = op2)
  129. ) and
  130. ((opsize = []) or (taicpu(instr).opsize in opsize));
  131. end;
  132. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  133. begin
  134. result :=
  135. (instr.typ = ait_instruction) and
  136. ((taicpu(instr).opcode = op1) or
  137. (taicpu(instr).opcode = op2) or
  138. (taicpu(instr).opcode = op3)
  139. ) and
  140. ((opsize = []) or (taicpu(instr).opsize in opsize));
  141. end;
  142. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  143. const opsize : topsizes) : boolean;
  144. var
  145. op : TAsmOp;
  146. begin
  147. result:=false;
  148. for op in ops do
  149. begin
  150. if (instr.typ = ait_instruction) and
  151. (taicpu(instr).opcode = op) and
  152. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  153. begin
  154. result:=true;
  155. exit;
  156. end;
  157. end;
  158. end;
  159. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  160. begin
  161. result := (oper.typ = top_reg) and (oper.reg = reg);
  162. end;
  163. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  164. begin
  165. result := (oper.typ = top_const) and (oper.val = a);
  166. end;
  167. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  168. begin
  169. result := oper1.typ = oper2.typ;
  170. if result then
  171. case oper1.typ of
  172. top_const:
  173. Result:=oper1.val = oper2.val;
  174. top_reg:
  175. Result:=oper1.reg = oper2.reg;
  176. top_ref:
  177. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  178. else
  179. internalerror(2013102801);
  180. end
  181. end;
  182. function RefsEqual(const r1, r2: treference): boolean;
  183. begin
  184. RefsEqual :=
  185. (r1.offset = r2.offset) and
  186. (r1.segment = r2.segment) and (r1.base = r2.base) and
  187. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  188. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  189. (r1.relsymbol = r2.relsymbol) and
  190. (r1.volatility=[]) and
  191. (r2.volatility=[]);
  192. end;
  193. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  194. begin
  195. Result:=(ref.offset=0) and
  196. (ref.scalefactor in [0,1]) and
  197. (ref.segment=NR_NO) and
  198. (ref.symbol=nil) and
  199. (ref.relsymbol=nil) and
  200. ((base=NR_INVALID) or
  201. (ref.base=base)) and
  202. ((index=NR_INVALID) or
  203. (ref.index=index)) and
  204. (ref.volatility=[]);
  205. end;
  206. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  207. begin
  208. Result:=(ref.scalefactor in [0,1]) and
  209. (ref.segment=NR_NO) and
  210. (ref.symbol=nil) and
  211. (ref.relsymbol=nil) and
  212. ((base=NR_INVALID) or
  213. (ref.base=base)) and
  214. ((index=NR_INVALID) or
  215. (ref.index=index)) and
  216. (ref.volatility=[]);
  217. end;
  218. function InstrReadsFlags(p: tai): boolean;
  219. begin
  220. InstrReadsFlags := true;
  221. case p.typ of
  222. ait_instruction:
  223. if InsProp[taicpu(p).opcode].Ch*
  224. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  225. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  226. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  227. exit;
  228. ait_label:
  229. exit;
  230. else
  231. ;
  232. end;
  233. InstrReadsFlags := false;
  234. end;
  235. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  236. begin
  237. Next:=Current;
  238. repeat
  239. Result:=GetNextInstruction(Next,Next);
  240. until not (Result) or
  241. not(cs_opt_level3 in current_settings.optimizerswitches) or
  242. (Next.typ<>ait_instruction) or
  243. RegInInstruction(reg,Next) or
  244. is_calljmp(taicpu(Next).opcode);
  245. end;
  246. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  247. begin
  248. Result:=RegReadByInstruction(reg,hp);
  249. end;
  250. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  251. var
  252. p: taicpu;
  253. opcount: longint;
  254. begin
  255. RegReadByInstruction := false;
  256. if hp.typ <> ait_instruction then
  257. exit;
  258. p := taicpu(hp);
  259. case p.opcode of
  260. A_CALL:
  261. regreadbyinstruction := true;
  262. A_IMUL:
  263. case p.ops of
  264. 1:
  265. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  266. (
  267. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  268. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  269. );
  270. 2,3:
  271. regReadByInstruction :=
  272. reginop(reg,p.oper[0]^) or
  273. reginop(reg,p.oper[1]^);
  274. else
  275. InternalError(2019112801);
  276. end;
  277. A_MUL:
  278. begin
  279. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  280. (
  281. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  282. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  283. );
  284. end;
  285. A_IDIV,A_DIV:
  286. begin
  287. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  288. (
  289. (getregtype(reg)=R_INTREGISTER) and
  290. (
  291. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  292. )
  293. );
  294. end;
  295. else
  296. begin
  297. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  298. begin
  299. RegReadByInstruction := false;
  300. exit;
  301. end;
  302. for opcount := 0 to p.ops-1 do
  303. if (p.oper[opCount]^.typ = top_ref) and
  304. RegInRef(reg,p.oper[opcount]^.ref^) then
  305. begin
  306. RegReadByInstruction := true;
  307. exit
  308. end;
  309. { special handling for SSE MOVSD }
  310. if (p.opcode=A_MOVSD) and (p.ops>0) then
  311. begin
  312. if p.ops<>2 then
  313. internalerror(2017042702);
  314. regReadByInstruction := reginop(reg,p.oper[0]^) or
  315. (
  316. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  317. );
  318. exit;
  319. end;
  320. with insprop[p.opcode] do
  321. begin
  322. if getregtype(reg)=R_INTREGISTER then
  323. begin
  324. case getsupreg(reg) of
  325. RS_EAX:
  326. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  327. begin
  328. RegReadByInstruction := true;
  329. exit
  330. end;
  331. RS_ECX:
  332. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  333. begin
  334. RegReadByInstruction := true;
  335. exit
  336. end;
  337. RS_EDX:
  338. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  339. begin
  340. RegReadByInstruction := true;
  341. exit
  342. end;
  343. RS_EBX:
  344. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  345. begin
  346. RegReadByInstruction := true;
  347. exit
  348. end;
  349. RS_ESP:
  350. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  351. begin
  352. RegReadByInstruction := true;
  353. exit
  354. end;
  355. RS_EBP:
  356. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  357. begin
  358. RegReadByInstruction := true;
  359. exit
  360. end;
  361. RS_ESI:
  362. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  363. begin
  364. RegReadByInstruction := true;
  365. exit
  366. end;
  367. RS_EDI:
  368. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  369. begin
  370. RegReadByInstruction := true;
  371. exit
  372. end;
  373. end;
  374. end;
  375. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  376. begin
  377. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  378. begin
  379. case p.condition of
  380. C_A,C_NBE, { CF=0 and ZF=0 }
  381. C_BE,C_NA: { CF=1 or ZF=1 }
  382. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  383. C_AE,C_NB,C_NC, { CF=0 }
  384. C_B,C_NAE,C_C: { CF=1 }
  385. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  386. C_NE,C_NZ, { ZF=0 }
  387. C_E,C_Z: { ZF=1 }
  388. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  389. C_G,C_NLE, { ZF=0 and SF=OF }
  390. C_LE,C_NG: { ZF=1 or SF<>OF }
  391. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  392. C_GE,C_NL, { SF=OF }
  393. C_L,C_NGE: { SF<>OF }
  394. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  395. C_NO, { OF=0 }
  396. C_O: { OF=1 }
  397. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  398. C_NP,C_PO, { PF=0 }
  399. C_P,C_PE: { PF=1 }
  400. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  401. C_NS, { SF=0 }
  402. C_S: { SF=1 }
  403. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  404. else
  405. internalerror(2017042701);
  406. end;
  407. if RegReadByInstruction then
  408. exit;
  409. end;
  410. case getsubreg(reg) of
  411. R_SUBW,R_SUBD,R_SUBQ:
  412. RegReadByInstruction :=
  413. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  414. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  415. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  416. R_SUBFLAGCARRY:
  417. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  418. R_SUBFLAGPARITY:
  419. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  420. R_SUBFLAGAUXILIARY:
  421. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  422. R_SUBFLAGZERO:
  423. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  424. R_SUBFLAGSIGN:
  425. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  426. R_SUBFLAGOVERFLOW:
  427. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  428. R_SUBFLAGINTERRUPT:
  429. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  430. R_SUBFLAGDIRECTION:
  431. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  432. else
  433. internalerror(2017042601);
  434. end;
  435. exit;
  436. end;
  437. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  438. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  439. (p.oper[0]^.reg=p.oper[1]^.reg) then
  440. exit;
  441. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  442. begin
  443. RegReadByInstruction := true;
  444. exit
  445. end;
  446. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  447. begin
  448. RegReadByInstruction := true;
  449. exit
  450. end;
  451. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  452. begin
  453. RegReadByInstruction := true;
  454. exit
  455. end;
  456. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  457. begin
  458. RegReadByInstruction := true;
  459. exit
  460. end;
  461. end;
  462. end;
  463. end;
  464. end;
  465. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  466. begin
  467. result:=false;
  468. if p1.typ<>ait_instruction then
  469. exit;
  470. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  471. exit(true);
  472. if (getregtype(reg)=R_INTREGISTER) and
  473. { change information for xmm movsd are not correct }
  474. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  475. begin
  476. case getsupreg(reg) of
  477. { RS_EAX = RS_RAX on x86-64 }
  478. RS_EAX:
  479. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  480. RS_ECX:
  481. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  482. RS_EDX:
  483. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  484. RS_EBX:
  485. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  486. RS_ESP:
  487. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  488. RS_EBP:
  489. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  490. RS_ESI:
  491. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  492. RS_EDI:
  493. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  494. else
  495. ;
  496. end;
  497. if result then
  498. exit;
  499. end
  500. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  501. begin
  502. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  503. exit(true);
  504. case getsubreg(reg) of
  505. R_SUBFLAGCARRY:
  506. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  507. R_SUBFLAGPARITY:
  508. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  509. R_SUBFLAGAUXILIARY:
  510. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  511. R_SUBFLAGZERO:
  512. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  513. R_SUBFLAGSIGN:
  514. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  515. R_SUBFLAGOVERFLOW:
  516. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  517. R_SUBFLAGINTERRUPT:
  518. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  519. R_SUBFLAGDIRECTION:
  520. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  521. else
  522. ;
  523. end;
  524. if result then
  525. exit;
  526. end
  527. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  528. exit(true);
  529. Result:=inherited RegInInstruction(Reg, p1);
  530. end;
  531. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  532. begin
  533. Result := False;
  534. if p1.typ <> ait_instruction then
  535. exit;
  536. with insprop[taicpu(p1).opcode] do
  537. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  538. begin
  539. case getsubreg(reg) of
  540. R_SUBW,R_SUBD,R_SUBQ:
  541. Result :=
  542. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  543. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  544. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  545. R_SUBFLAGCARRY:
  546. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  547. R_SUBFLAGPARITY:
  548. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  549. R_SUBFLAGAUXILIARY:
  550. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  551. R_SUBFLAGZERO:
  552. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  553. R_SUBFLAGSIGN:
  554. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  555. R_SUBFLAGOVERFLOW:
  556. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  557. R_SUBFLAGINTERRUPT:
  558. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  559. R_SUBFLAGDIRECTION:
  560. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  561. else
  562. internalerror(2017042602);
  563. end;
  564. exit;
  565. end;
  566. case taicpu(p1).opcode of
  567. A_CALL:
  568. { We could potentially set Result to False if the register in
  569. question is non-volatile for the subroutine's calling convention,
  570. but this would require detecting the calling convention in use and
  571. also assuming that the routine doesn't contain malformed assembly
  572. language, for example... so it could only be done under -O4 as it
  573. would be considered a side-effect. [Kit] }
  574. Result := True;
  575. A_MOVSD:
  576. { special handling for SSE MOVSD }
  577. if (taicpu(p1).ops>0) then
  578. begin
  579. if taicpu(p1).ops<>2 then
  580. internalerror(2017042703);
  581. Result := (taicpu(p1).oper[1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[1]^);
  582. end;
  583. { VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
  584. so fix it here (FK)
  585. }
  586. A_VMOVSS,
  587. A_VMOVSD:
  588. begin
  589. Result := (taicpu(p1).ops=3) and (taicpu(p1).oper[2]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[2]^);
  590. exit;
  591. end;
  592. A_IMUL:
  593. Result := (taicpu(p1).oper[taicpu(p1).ops-1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[taicpu(p1).ops-1]^);
  594. else
  595. ;
  596. end;
  597. if Result then
  598. exit;
  599. with insprop[taicpu(p1).opcode] do
  600. begin
  601. if getregtype(reg)=R_INTREGISTER then
  602. begin
  603. case getsupreg(reg) of
  604. RS_EAX:
  605. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  606. begin
  607. Result := True;
  608. exit
  609. end;
  610. RS_ECX:
  611. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  612. begin
  613. Result := True;
  614. exit
  615. end;
  616. RS_EDX:
  617. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  618. begin
  619. Result := True;
  620. exit
  621. end;
  622. RS_EBX:
  623. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  624. begin
  625. Result := True;
  626. exit
  627. end;
  628. RS_ESP:
  629. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  630. begin
  631. Result := True;
  632. exit
  633. end;
  634. RS_EBP:
  635. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  636. begin
  637. Result := True;
  638. exit
  639. end;
  640. RS_ESI:
  641. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  642. begin
  643. Result := True;
  644. exit
  645. end;
  646. RS_EDI:
  647. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  648. begin
  649. Result := True;
  650. exit
  651. end;
  652. end;
  653. end;
  654. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  655. begin
  656. Result := true;
  657. exit
  658. end;
  659. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  660. begin
  661. Result := true;
  662. exit
  663. end;
  664. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  665. begin
  666. Result := true;
  667. exit
  668. end;
  669. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  670. begin
  671. Result := true;
  672. exit
  673. end;
  674. end;
  675. end;
  676. {$ifdef DEBUG_AOPTCPU}
  677. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  678. begin
  679. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  680. end;
  681. function debug_tostr(i: tcgint): string; inline;
  682. begin
  683. Result := tostr(i);
  684. end;
  685. function debug_regname(r: TRegister): string; inline;
  686. begin
  687. Result := '%' + std_regname(r);
  688. end;
  689. { Debug output function - creates a string representation of an operator }
  690. function debug_operstr(oper: TOper): string;
  691. begin
  692. case oper.typ of
  693. top_const:
  694. Result := '$' + debug_tostr(oper.val);
  695. top_reg:
  696. Result := debug_regname(oper.reg);
  697. top_ref:
  698. begin
  699. if oper.ref^.offset <> 0 then
  700. Result := debug_tostr(oper.ref^.offset) + '('
  701. else
  702. Result := '(';
  703. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  704. begin
  705. Result := Result + debug_regname(oper.ref^.base);
  706. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  707. Result := Result + ',' + debug_regname(oper.ref^.index);
  708. end
  709. else
  710. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  711. Result := Result + debug_regname(oper.ref^.index);
  712. if (oper.ref^.scalefactor > 1) then
  713. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  714. else
  715. Result := Result + ')';
  716. end;
  717. else
  718. Result := '[UNKNOWN]';
  719. end;
  720. end;
  721. function debug_op2str(opcode: tasmop): string; inline;
  722. begin
  723. Result := std_op2str[opcode];
  724. end;
  725. function debug_opsize2str(opsize: topsize): string; inline;
  726. begin
  727. Result := gas_opsize2str[opsize];
  728. end;
  729. {$else DEBUG_AOPTCPU}
  730. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  731. begin
  732. end;
  733. function debug_tostr(i: tcgint): string; inline;
  734. begin
  735. Result := '';
  736. end;
  737. function debug_regname(r: TRegister): string; inline;
  738. begin
  739. Result := '';
  740. end;
  741. function debug_operstr(oper: TOper): string; inline;
  742. begin
  743. Result := '';
  744. end;
  745. function debug_op2str(opcode: tasmop): string; inline;
  746. begin
  747. Result := '';
  748. end;
  749. function debug_opsize2str(opsize: topsize): string; inline;
  750. begin
  751. Result := '';
  752. end;
  753. {$endif DEBUG_AOPTCPU}
  754. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  755. begin
  756. if not SuperRegistersEqual(reg1,reg2) then
  757. exit(false);
  758. if getregtype(reg1)<>R_INTREGISTER then
  759. exit(true); {because SuperRegisterEqual is true}
  760. case getsubreg(reg1) of
  761. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  762. higher, it preserves the high bits, so the new value depends on
  763. reg2's previous value. In other words, it is equivalent to doing:
  764. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  765. R_SUBL:
  766. exit(getsubreg(reg2)=R_SUBL);
  767. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  768. higher, it actually does a:
  769. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  770. R_SUBH:
  771. exit(getsubreg(reg2)=R_SUBH);
  772. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  773. bits of reg2:
  774. reg2 := (reg2 and $ffff0000) or word(reg1); }
  775. R_SUBW:
  776. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  777. { a write to R_SUBD always overwrites every other subregister,
  778. because it clears the high 32 bits of R_SUBQ on x86_64 }
  779. R_SUBD,
  780. R_SUBQ:
  781. exit(true);
  782. else
  783. internalerror(2017042801);
  784. end;
  785. end;
  786. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  787. begin
  788. if not SuperRegistersEqual(reg1,reg2) then
  789. exit(false);
  790. if getregtype(reg1)<>R_INTREGISTER then
  791. exit(true); {because SuperRegisterEqual is true}
  792. case getsubreg(reg1) of
  793. R_SUBL:
  794. exit(getsubreg(reg2)<>R_SUBH);
  795. R_SUBH:
  796. exit(getsubreg(reg2)<>R_SUBL);
  797. R_SUBW,
  798. R_SUBD,
  799. R_SUBQ:
  800. exit(true);
  801. else
  802. internalerror(2017042802);
  803. end;
  804. end;
  805. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  806. var
  807. hp1 : tai;
  808. l : TCGInt;
  809. begin
  810. result:=false;
  811. { changes the code sequence
  812. shr/sar const1, x
  813. shl const2, x
  814. to
  815. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  816. if GetNextInstruction(p, hp1) and
  817. MatchInstruction(hp1,A_SHL,[]) and
  818. (taicpu(p).oper[0]^.typ = top_const) and
  819. (taicpu(hp1).oper[0]^.typ = top_const) and
  820. (taicpu(hp1).opsize = taicpu(p).opsize) and
  821. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  822. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  823. begin
  824. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  825. not(cs_opt_size in current_settings.optimizerswitches) then
  826. begin
  827. { shr/sar const1, %reg
  828. shl const2, %reg
  829. with const1 > const2 }
  830. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  831. taicpu(hp1).opcode := A_AND;
  832. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  833. case taicpu(p).opsize Of
  834. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  835. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  836. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  837. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  838. else
  839. Internalerror(2017050703)
  840. end;
  841. end
  842. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  843. not(cs_opt_size in current_settings.optimizerswitches) then
  844. begin
  845. { shr/sar const1, %reg
  846. shl const2, %reg
  847. with const1 < const2 }
  848. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  849. taicpu(p).opcode := A_AND;
  850. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  851. case taicpu(p).opsize Of
  852. S_B: taicpu(p).loadConst(0,l Xor $ff);
  853. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  854. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  855. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  856. else
  857. Internalerror(2017050702)
  858. end;
  859. end
  860. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  861. begin
  862. { shr/sar const1, %reg
  863. shl const2, %reg
  864. with const1 = const2 }
  865. taicpu(p).opcode := A_AND;
  866. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  867. case taicpu(p).opsize Of
  868. S_B: taicpu(p).loadConst(0,l Xor $ff);
  869. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  870. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  871. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  872. else
  873. Internalerror(2017050701)
  874. end;
  875. asml.remove(hp1);
  876. hp1.free;
  877. end;
  878. end;
  879. end;
  880. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  881. var
  882. opsize : topsize;
  883. hp1 : tai;
  884. tmpref : treference;
  885. ShiftValue : Cardinal;
  886. BaseValue : TCGInt;
  887. begin
  888. result:=false;
  889. opsize:=taicpu(p).opsize;
  890. { changes certain "imul const, %reg"'s to lea sequences }
  891. if (MatchOpType(taicpu(p),top_const,top_reg) or
  892. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  893. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  894. if (taicpu(p).oper[0]^.val = 1) then
  895. if (taicpu(p).ops = 2) then
  896. { remove "imul $1, reg" }
  897. begin
  898. hp1 := tai(p.Next);
  899. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  900. RemoveCurrentP(p);
  901. result:=true;
  902. end
  903. else
  904. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  905. begin
  906. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  907. InsertLLItem(p.previous, p.next, hp1);
  908. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  909. p.free;
  910. p := hp1;
  911. end
  912. else if ((taicpu(p).ops <= 2) or
  913. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  914. not(cs_opt_size in current_settings.optimizerswitches) and
  915. (not(GetNextInstruction(p, hp1)) or
  916. not((tai(hp1).typ = ait_instruction) and
  917. ((taicpu(hp1).opcode=A_Jcc) and
  918. (taicpu(hp1).condition in [C_O,C_NO])))) then
  919. begin
  920. {
  921. imul X, reg1, reg2 to
  922. lea (reg1,reg1,Y), reg2
  923. shl ZZ,reg2
  924. imul XX, reg1 to
  925. lea (reg1,reg1,YY), reg1
  926. shl ZZ,reg2
  927. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  928. it does not exist as a separate optimization target in FPC though.
  929. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  930. at most two zeros
  931. }
  932. reference_reset(tmpref,1,[]);
  933. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  934. begin
  935. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  936. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  937. TmpRef.base := taicpu(p).oper[1]^.reg;
  938. TmpRef.index := taicpu(p).oper[1]^.reg;
  939. if not(BaseValue in [3,5,9]) then
  940. Internalerror(2018110101);
  941. TmpRef.ScaleFactor := BaseValue-1;
  942. if (taicpu(p).ops = 2) then
  943. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  944. else
  945. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  946. AsmL.InsertAfter(hp1,p);
  947. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  948. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  949. RemoveCurrentP(p);
  950. if ShiftValue>0 then
  951. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  952. end;
  953. end;
  954. end;
  955. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  956. var
  957. p: taicpu;
  958. begin
  959. if not assigned(hp) or
  960. (hp.typ <> ait_instruction) then
  961. begin
  962. Result := false;
  963. exit;
  964. end;
  965. p := taicpu(hp);
  966. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  967. with insprop[p.opcode] do
  968. begin
  969. case getsubreg(reg) of
  970. R_SUBW,R_SUBD,R_SUBQ:
  971. Result:=
  972. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  973. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  974. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  975. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  976. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  977. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  978. R_SUBFLAGCARRY:
  979. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  980. R_SUBFLAGPARITY:
  981. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  982. R_SUBFLAGAUXILIARY:
  983. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  984. R_SUBFLAGZERO:
  985. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  986. R_SUBFLAGSIGN:
  987. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  988. R_SUBFLAGOVERFLOW:
  989. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  990. R_SUBFLAGINTERRUPT:
  991. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  992. R_SUBFLAGDIRECTION:
  993. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  994. else
  995. begin
  996. writeln(getsubreg(reg));
  997. internalerror(2017050501);
  998. end;
  999. end;
  1000. exit;
  1001. end;
  1002. Result :=
  1003. (((p.opcode = A_MOV) or
  1004. (p.opcode = A_MOVZX) or
  1005. (p.opcode = A_MOVSX) or
  1006. (p.opcode = A_LEA) or
  1007. (p.opcode = A_VMOVSS) or
  1008. (p.opcode = A_VMOVSD) or
  1009. (p.opcode = A_VMOVAPD) or
  1010. (p.opcode = A_VMOVAPS) or
  1011. (p.opcode = A_VMOVQ) or
  1012. (p.opcode = A_MOVSS) or
  1013. (p.opcode = A_MOVSD) or
  1014. (p.opcode = A_MOVQ) or
  1015. (p.opcode = A_MOVAPD) or
  1016. (p.opcode = A_MOVAPS) or
  1017. {$ifndef x86_64}
  1018. (p.opcode = A_LDS) or
  1019. (p.opcode = A_LES) or
  1020. {$endif not x86_64}
  1021. (p.opcode = A_LFS) or
  1022. (p.opcode = A_LGS) or
  1023. (p.opcode = A_LSS)) and
  1024. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1025. (p.oper[1]^.typ = top_reg) and
  1026. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1027. ((p.oper[0]^.typ = top_const) or
  1028. ((p.oper[0]^.typ = top_reg) and
  1029. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1030. ((p.oper[0]^.typ = top_ref) and
  1031. not RegInRef(reg,p.oper[0]^.ref^)))) or
  1032. ((p.opcode = A_POP) and
  1033. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  1034. ((p.opcode = A_IMUL) and
  1035. (p.ops=3) and
  1036. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1037. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  1038. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  1039. ((((p.opcode = A_IMUL) or
  1040. (p.opcode = A_MUL)) and
  1041. (p.ops=1)) and
  1042. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  1043. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  1044. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1045. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1046. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  1047. {$ifdef x86_64}
  1048. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  1049. {$endif x86_64}
  1050. )) or
  1051. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  1052. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  1053. {$ifdef x86_64}
  1054. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  1055. {$endif x86_64}
  1056. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  1057. {$ifndef x86_64}
  1058. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1059. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1060. {$endif not x86_64}
  1061. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1062. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1063. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  1064. {$ifndef x86_64}
  1065. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1066. {$endif not x86_64}
  1067. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  1068. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  1069. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  1070. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  1071. {$ifdef x86_64}
  1072. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  1073. {$endif x86_64}
  1074. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1075. (((p.opcode = A_FSTSW) or
  1076. (p.opcode = A_FNSTSW)) and
  1077. (p.oper[0]^.typ=top_reg) and
  1078. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  1079. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  1080. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  1081. (p.oper[0]^.reg=p.oper[1]^.reg) and
  1082. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  1083. end;
  1084. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1085. var
  1086. hp2,hp3 : tai;
  1087. begin
  1088. { some x86-64 issue a NOP before the real exit code }
  1089. if MatchInstruction(p,A_NOP,[]) then
  1090. GetNextInstruction(p,p);
  1091. result:=assigned(p) and (p.typ=ait_instruction) and
  1092. ((taicpu(p).opcode = A_RET) or
  1093. ((taicpu(p).opcode=A_LEAVE) and
  1094. GetNextInstruction(p,hp2) and
  1095. MatchInstruction(hp2,A_RET,[S_NO])
  1096. ) or
  1097. (((taicpu(p).opcode=A_LEA) and
  1098. MatchOpType(taicpu(p),top_ref,top_reg) and
  1099. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1100. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1101. ) and
  1102. GetNextInstruction(p,hp2) and
  1103. MatchInstruction(hp2,A_RET,[S_NO])
  1104. ) or
  1105. ((((taicpu(p).opcode=A_MOV) and
  1106. MatchOpType(taicpu(p),top_reg,top_reg) and
  1107. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1108. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1109. ((taicpu(p).opcode=A_LEA) and
  1110. MatchOpType(taicpu(p),top_ref,top_reg) and
  1111. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1112. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1113. )
  1114. ) and
  1115. GetNextInstruction(p,hp2) and
  1116. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1117. MatchOpType(taicpu(hp2),top_reg) and
  1118. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1119. GetNextInstruction(hp2,hp3) and
  1120. MatchInstruction(hp3,A_RET,[S_NO])
  1121. )
  1122. );
  1123. end;
  1124. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1125. begin
  1126. isFoldableArithOp := False;
  1127. case hp1.opcode of
  1128. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1129. isFoldableArithOp :=
  1130. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1131. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1132. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1133. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1134. (taicpu(hp1).oper[1]^.reg = reg);
  1135. A_INC,A_DEC,A_NEG,A_NOT:
  1136. isFoldableArithOp :=
  1137. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1138. (taicpu(hp1).oper[0]^.reg = reg);
  1139. else
  1140. ;
  1141. end;
  1142. end;
  1143. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1144. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1145. var
  1146. hp2: tai;
  1147. begin
  1148. hp2 := p;
  1149. repeat
  1150. hp2 := tai(hp2.previous);
  1151. if assigned(hp2) and
  1152. (hp2.typ = ait_regalloc) and
  1153. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1154. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1155. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1156. begin
  1157. asml.remove(hp2);
  1158. hp2.free;
  1159. break;
  1160. end;
  1161. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1162. end;
  1163. begin
  1164. case current_procinfo.procdef.returndef.typ of
  1165. arraydef,recorddef,pointerdef,
  1166. stringdef,enumdef,procdef,objectdef,errordef,
  1167. filedef,setdef,procvardef,
  1168. classrefdef,forwarddef:
  1169. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1170. orddef:
  1171. if current_procinfo.procdef.returndef.size <> 0 then
  1172. begin
  1173. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1174. { for int64/qword }
  1175. if current_procinfo.procdef.returndef.size = 8 then
  1176. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1177. end;
  1178. else
  1179. ;
  1180. end;
  1181. end;
  1182. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1183. var
  1184. hp1,hp2 : tai;
  1185. begin
  1186. result:=false;
  1187. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1188. begin
  1189. { vmova* reg1,reg1
  1190. =>
  1191. <nop> }
  1192. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1193. begin
  1194. GetNextInstruction(p,hp1);
  1195. asml.Remove(p);
  1196. p.Free;
  1197. p:=hp1;
  1198. result:=true;
  1199. exit;
  1200. end
  1201. else if GetNextInstruction(p,hp1) then
  1202. begin
  1203. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1204. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1205. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1206. begin
  1207. { vmova* reg1,reg2
  1208. vmova* reg2,reg3
  1209. dealloc reg2
  1210. =>
  1211. vmova* reg1,reg3 }
  1212. TransferUsedRegs(TmpUsedRegs);
  1213. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1214. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1215. begin
  1216. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 1',p);
  1217. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1218. asml.Remove(hp1);
  1219. hp1.Free;
  1220. result:=true;
  1221. exit;
  1222. end
  1223. { special case:
  1224. vmova* reg1,reg2
  1225. vmova* reg2,reg1
  1226. =>
  1227. vmova* reg1,reg2 }
  1228. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1229. begin
  1230. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 2',p);
  1231. asml.Remove(hp1);
  1232. hp1.Free;
  1233. result:=true;
  1234. exit;
  1235. end
  1236. end
  1237. end;
  1238. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1239. begin
  1240. if MatchInstruction(hp1,[A_VFMADDPD,
  1241. A_VFMADD132PD,
  1242. A_VFMADD132PS,
  1243. A_VFMADD132SD,
  1244. A_VFMADD132SS,
  1245. A_VFMADD213PD,
  1246. A_VFMADD213PS,
  1247. A_VFMADD213SD,
  1248. A_VFMADD213SS,
  1249. A_VFMADD231PD,
  1250. A_VFMADD231PS,
  1251. A_VFMADD231SD,
  1252. A_VFMADD231SS,
  1253. A_VFMADDSUB132PD,
  1254. A_VFMADDSUB132PS,
  1255. A_VFMADDSUB213PD,
  1256. A_VFMADDSUB213PS,
  1257. A_VFMADDSUB231PD,
  1258. A_VFMADDSUB231PS,
  1259. A_VFMSUB132PD,
  1260. A_VFMSUB132PS,
  1261. A_VFMSUB132SD,
  1262. A_VFMSUB132SS,
  1263. A_VFMSUB213PD,
  1264. A_VFMSUB213PS,
  1265. A_VFMSUB213SD,
  1266. A_VFMSUB213SS,
  1267. A_VFMSUB231PD,
  1268. A_VFMSUB231PS,
  1269. A_VFMSUB231SD,
  1270. A_VFMSUB231SS,
  1271. A_VFMSUBADD132PD,
  1272. A_VFMSUBADD132PS,
  1273. A_VFMSUBADD213PD,
  1274. A_VFMSUBADD213PS,
  1275. A_VFMSUBADD231PD,
  1276. A_VFMSUBADD231PS,
  1277. A_VFNMADD132PD,
  1278. A_VFNMADD132PS,
  1279. A_VFNMADD132SD,
  1280. A_VFNMADD132SS,
  1281. A_VFNMADD213PD,
  1282. A_VFNMADD213PS,
  1283. A_VFNMADD213SD,
  1284. A_VFNMADD213SS,
  1285. A_VFNMADD231PD,
  1286. A_VFNMADD231PS,
  1287. A_VFNMADD231SD,
  1288. A_VFNMADD231SS,
  1289. A_VFNMSUB132PD,
  1290. A_VFNMSUB132PS,
  1291. A_VFNMSUB132SD,
  1292. A_VFNMSUB132SS,
  1293. A_VFNMSUB213PD,
  1294. A_VFNMSUB213PS,
  1295. A_VFNMSUB213SD,
  1296. A_VFNMSUB213SS,
  1297. A_VFNMSUB231PD,
  1298. A_VFNMSUB231PS,
  1299. A_VFNMSUB231SD,
  1300. A_VFNMSUB231SS],[S_NO]) and
  1301. { we mix single and double opperations here because we assume that the compiler
  1302. generates vmovapd only after double operations and vmovaps only after single operations }
  1303. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1304. GetNextInstruction(hp1,hp2) and
  1305. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1306. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1307. begin
  1308. TransferUsedRegs(TmpUsedRegs);
  1309. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1310. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1311. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1312. begin
  1313. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1314. asml.Remove(p);
  1315. p.Free;
  1316. asml.Remove(hp2);
  1317. hp2.Free;
  1318. p:=hp1;
  1319. end;
  1320. end
  1321. else if (hp1.typ = ait_instruction) and
  1322. GetNextInstruction(hp1, hp2) and
  1323. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1324. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1325. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1326. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1327. (((taicpu(p).opcode=A_MOVAPS) and
  1328. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1329. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1330. ((taicpu(p).opcode=A_MOVAPD) and
  1331. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1332. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1333. ) then
  1334. { change
  1335. movapX reg,reg2
  1336. addsX/subsX/... reg3, reg2
  1337. movapX reg2,reg
  1338. to
  1339. addsX/subsX/... reg3,reg
  1340. }
  1341. begin
  1342. TransferUsedRegs(TmpUsedRegs);
  1343. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1344. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1345. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1346. begin
  1347. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1348. debug_op2str(taicpu(p).opcode)+' '+
  1349. debug_op2str(taicpu(hp1).opcode)+' '+
  1350. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1351. { we cannot eliminate the first move if
  1352. the operations uses the same register for source and dest }
  1353. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1354. begin
  1355. asml.remove(p);
  1356. p.Free;
  1357. end;
  1358. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1359. asml.remove(hp2);
  1360. hp2.Free;
  1361. p:=hp1;
  1362. result:=true;
  1363. end;
  1364. end;
  1365. end;
  1366. end;
  1367. end;
  1368. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1369. var
  1370. hp1 : tai;
  1371. begin
  1372. result:=false;
  1373. { replace
  1374. V<Op>X %mreg1,%mreg2,%mreg3
  1375. VMovX %mreg3,%mreg4
  1376. dealloc %mreg3
  1377. by
  1378. V<Op>X %mreg1,%mreg2,%mreg4
  1379. ?
  1380. }
  1381. if GetNextInstruction(p,hp1) and
  1382. { we mix single and double operations here because we assume that the compiler
  1383. generates vmovapd only after double operations and vmovaps only after single operations }
  1384. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1385. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1386. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1387. begin
  1388. TransferUsedRegs(TmpUsedRegs);
  1389. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1390. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  1391. begin
  1392. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1393. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1394. asml.Remove(hp1);
  1395. hp1.Free;
  1396. result:=true;
  1397. end;
  1398. end;
  1399. end;
  1400. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1401. var
  1402. hp1, hp2: tai;
  1403. GetNextInstruction_p, TempRegUsed: Boolean;
  1404. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1405. NewSize: topsize;
  1406. CurrentReg: TRegister;
  1407. begin
  1408. Result:=false;
  1409. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1410. { remove mov reg1,reg1? }
  1411. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1412. then
  1413. begin
  1414. DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p);
  1415. { take care of the register (de)allocs following p }
  1416. UpdateUsedRegs(tai(p.next));
  1417. asml.remove(p);
  1418. p.free;
  1419. p:=hp1;
  1420. Result:=true;
  1421. exit;
  1422. end;
  1423. { All the next optimisations require a next instruction }
  1424. if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
  1425. Exit;
  1426. if (taicpu(hp1).opcode = A_AND) and
  1427. (taicpu(p).oper[1]^.typ = top_reg) and
  1428. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1429. begin
  1430. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1431. begin
  1432. case taicpu(p).opsize of
  1433. S_L:
  1434. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1435. begin
  1436. { Optimize out:
  1437. mov x, %reg
  1438. and ffffffffh, %reg
  1439. }
  1440. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1441. asml.remove(hp1);
  1442. hp1.free;
  1443. Result:=true;
  1444. exit;
  1445. end;
  1446. S_Q: { TODO: Confirm if this is even possible }
  1447. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1448. begin
  1449. { Optimize out:
  1450. mov x, %reg
  1451. and ffffffffffffffffh, %reg
  1452. }
  1453. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1454. asml.remove(hp1);
  1455. hp1.free;
  1456. Result:=true;
  1457. exit;
  1458. end;
  1459. else
  1460. ;
  1461. end;
  1462. end
  1463. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1464. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1465. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1466. then
  1467. begin
  1468. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1469. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1470. case taicpu(p).opsize of
  1471. S_B:
  1472. if (taicpu(hp1).oper[0]^.val = $ff) then
  1473. begin
  1474. { Convert:
  1475. movb x, %regl movb x, %regl
  1476. andw ffh, %regw andl ffh, %regd
  1477. To:
  1478. movzbw x, %regd movzbl x, %regd
  1479. (Identical registers, just different sizes)
  1480. }
  1481. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1482. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1483. case taicpu(hp1).opsize of
  1484. S_W: NewSize := S_BW;
  1485. S_L: NewSize := S_BL;
  1486. {$ifdef x86_64}
  1487. S_Q: NewSize := S_BQ;
  1488. {$endif x86_64}
  1489. else
  1490. InternalError(2018011510);
  1491. end;
  1492. end
  1493. else
  1494. NewSize := S_NO;
  1495. S_W:
  1496. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1497. begin
  1498. { Convert:
  1499. movw x, %regw
  1500. andl ffffh, %regd
  1501. To:
  1502. movzwl x, %regd
  1503. (Identical registers, just different sizes)
  1504. }
  1505. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1506. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1507. case taicpu(hp1).opsize of
  1508. S_L: NewSize := S_WL;
  1509. {$ifdef x86_64}
  1510. S_Q: NewSize := S_WQ;
  1511. {$endif x86_64}
  1512. else
  1513. InternalError(2018011511);
  1514. end;
  1515. end
  1516. else
  1517. NewSize := S_NO;
  1518. else
  1519. NewSize := S_NO;
  1520. end;
  1521. if NewSize <> S_NO then
  1522. begin
  1523. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1524. { The actual optimization }
  1525. taicpu(p).opcode := A_MOVZX;
  1526. taicpu(p).changeopsize(NewSize);
  1527. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1528. { Safeguard if "and" is followed by a conditional command }
  1529. TransferUsedRegs(TmpUsedRegs);
  1530. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1531. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1532. begin
  1533. { At this point, the "and" command is effectively equivalent to
  1534. "test %reg,%reg". This will be handled separately by the
  1535. Peephole Optimizer. [Kit] }
  1536. DebugMsg(SPeepholeOptimization + PreMessage +
  1537. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1538. end
  1539. else
  1540. begin
  1541. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1542. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1543. asml.Remove(hp1);
  1544. hp1.Free;
  1545. end;
  1546. Result := True;
  1547. Exit;
  1548. end;
  1549. end;
  1550. end;
  1551. { Next instruction is also a MOV ? }
  1552. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1553. begin
  1554. if (taicpu(p).oper[1]^.typ = top_reg) and
  1555. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1556. begin
  1557. CurrentReg := taicpu(p).oper[1]^.reg;
  1558. TransferUsedRegs(TmpUsedRegs);
  1559. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1560. { we have
  1561. mov x, %treg
  1562. mov %treg, y
  1563. }
  1564. if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
  1565. if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
  1566. { we've got
  1567. mov x, %treg
  1568. mov %treg, y
  1569. with %treg is not used after }
  1570. case taicpu(p).oper[0]^.typ Of
  1571. top_reg:
  1572. begin
  1573. { change
  1574. mov %reg, %treg
  1575. mov %treg, y
  1576. to
  1577. mov %reg, y
  1578. }
  1579. if taicpu(hp1).oper[1]^.typ=top_reg then
  1580. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1581. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1582. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1583. asml.remove(hp1);
  1584. hp1.free;
  1585. Result:=true;
  1586. Exit;
  1587. end;
  1588. top_const:
  1589. begin
  1590. { change
  1591. mov const, %treg
  1592. mov %treg, y
  1593. to
  1594. mov const, y
  1595. }
  1596. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1597. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1598. begin
  1599. if taicpu(hp1).oper[1]^.typ=top_reg then
  1600. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1601. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1602. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1603. asml.remove(hp1);
  1604. hp1.free;
  1605. Result:=true;
  1606. Exit;
  1607. end;
  1608. end;
  1609. top_ref:
  1610. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1611. begin
  1612. { change
  1613. mov mem, %treg
  1614. mov %treg, %reg
  1615. to
  1616. mov mem, %reg"
  1617. }
  1618. taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
  1619. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1620. asml.remove(hp1);
  1621. hp1.free;
  1622. Result:=true;
  1623. Exit;
  1624. end;
  1625. else
  1626. { Do nothing };
  1627. end
  1628. else
  1629. { %treg is used afterwards }
  1630. case taicpu(p).oper[0]^.typ of
  1631. top_const:
  1632. if
  1633. (
  1634. not (cs_opt_size in current_settings.optimizerswitches) or
  1635. (taicpu(hp1).opsize = S_B)
  1636. ) and
  1637. (
  1638. (taicpu(hp1).oper[1]^.typ = top_reg) or
  1639. ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
  1640. ) then
  1641. begin
  1642. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
  1643. taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
  1644. end;
  1645. top_reg:
  1646. begin
  1647. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = ' + debug_regname(taicpu(p).oper[0]^.reg) + '; changed to minimise pipeline stall (MovMov2Mov 6c)',hp1);
  1648. AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
  1649. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1650. begin
  1651. DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done',hp1);
  1652. asml.remove(hp1);
  1653. hp1.free;
  1654. Result := True;
  1655. Exit;
  1656. end;
  1657. taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
  1658. end;
  1659. else
  1660. { Do nothing };
  1661. end;
  1662. end;
  1663. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1664. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1665. { mov reg1, mem1 or mov mem1, reg1
  1666. mov mem2, reg2 mov reg2, mem2}
  1667. begin
  1668. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1669. { mov reg1, mem1 or mov mem1, reg1
  1670. mov mem2, reg1 mov reg2, mem1}
  1671. begin
  1672. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1673. { Removes the second statement from
  1674. mov reg1, mem1/reg2
  1675. mov mem1/reg2, reg1 }
  1676. begin
  1677. if taicpu(p).oper[0]^.typ=top_reg then
  1678. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1679. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1680. asml.remove(hp1);
  1681. hp1.free;
  1682. Result:=true;
  1683. exit;
  1684. end
  1685. else
  1686. begin
  1687. TransferUsedRegs(TmpUsedRegs);
  1688. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1689. if (taicpu(p).oper[1]^.typ = top_ref) and
  1690. { mov reg1, mem1
  1691. mov mem2, reg1 }
  1692. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1693. GetNextInstruction(hp1, hp2) and
  1694. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1695. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1696. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1697. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1698. { change to
  1699. mov reg1, mem1 mov reg1, mem1
  1700. mov mem2, reg1 cmp reg1, mem2
  1701. cmp mem1, reg1
  1702. }
  1703. begin
  1704. asml.remove(hp2);
  1705. hp2.free;
  1706. taicpu(hp1).opcode := A_CMP;
  1707. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1708. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1709. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1710. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1711. end;
  1712. end;
  1713. end
  1714. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1715. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1716. begin
  1717. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1718. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1719. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1720. end
  1721. else
  1722. begin
  1723. TransferUsedRegs(TmpUsedRegs);
  1724. if GetNextInstruction(hp1, hp2) and
  1725. MatchOpType(taicpu(p),top_ref,top_reg) and
  1726. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1727. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1728. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1729. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1730. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1731. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1732. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1733. { mov mem1, %reg1
  1734. mov %reg1, mem2
  1735. mov mem2, reg2
  1736. to:
  1737. mov mem1, reg2
  1738. mov reg2, mem2}
  1739. begin
  1740. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1741. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1742. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1743. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1744. asml.remove(hp2);
  1745. hp2.free;
  1746. end
  1747. {$ifdef i386}
  1748. { this is enabled for i386 only, as the rules to create the reg sets below
  1749. are too complicated for x86-64, so this makes this code too error prone
  1750. on x86-64
  1751. }
  1752. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1753. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1754. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1755. { mov mem1, reg1 mov mem1, reg1
  1756. mov reg1, mem2 mov reg1, mem2
  1757. mov mem2, reg2 mov mem2, reg1
  1758. to: to:
  1759. mov mem1, reg1 mov mem1, reg1
  1760. mov mem1, reg2 mov reg1, mem2
  1761. mov reg1, mem2
  1762. or (if mem1 depends on reg1
  1763. and/or if mem2 depends on reg2)
  1764. to:
  1765. mov mem1, reg1
  1766. mov reg1, mem2
  1767. mov reg1, reg2
  1768. }
  1769. begin
  1770. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1771. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1772. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1773. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1774. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1775. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1776. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1777. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1778. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1779. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1780. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1781. end
  1782. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1783. begin
  1784. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1785. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1786. end
  1787. else
  1788. begin
  1789. asml.remove(hp2);
  1790. hp2.free;
  1791. end
  1792. {$endif i386}
  1793. ;
  1794. end;
  1795. end;
  1796. (* { movl [mem1],reg1
  1797. movl [mem1],reg2
  1798. to
  1799. movl [mem1],reg1
  1800. movl reg1,reg2
  1801. }
  1802. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1803. (taicpu(p).oper[1]^.typ = top_reg) and
  1804. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1805. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1806. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1807. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1808. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1809. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1810. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1811. else*)
  1812. { movl const1,[mem1]
  1813. movl [mem1],reg1
  1814. to
  1815. movl const1,reg1
  1816. movl reg1,[mem1]
  1817. }
  1818. if MatchOpType(Taicpu(p),top_const,top_ref) and
  1819. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1820. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1821. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1822. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1823. begin
  1824. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1825. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1826. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1827. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1828. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1829. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1830. Result:=true;
  1831. exit;
  1832. end;
  1833. {
  1834. mov* x,reg1
  1835. mov* y,reg1
  1836. to
  1837. mov* y,reg1
  1838. }
  1839. if (taicpu(p).oper[1]^.typ=top_reg) and
  1840. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1841. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1842. begin
  1843. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1844. { take care of the register (de)allocs following p }
  1845. UpdateUsedRegs(tai(p.next));
  1846. asml.remove(p);
  1847. p.free;
  1848. p:=hp1;
  1849. Result:=true;
  1850. exit;
  1851. end;
  1852. end;
  1853. { search further than the next instruction for a mov }
  1854. if
  1855. { check as much as possible before the expensive GetNextInstructionUsingReg call }
  1856. (taicpu(p).oper[1]^.typ = top_reg) and
  1857. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  1858. not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) and
  1859. { we work with hp2 here, so hp1 can be still used later on when
  1860. checking for GetNextInstruction_p }
  1861. { GetNextInstructionUsingReg only searches one instruction ahead unless -O3 is specified }
  1862. GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[1]^.reg) and
  1863. MatchInstruction(hp2,A_MOV,[]) and
  1864. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1865. ((taicpu(p).oper[0]^.typ=top_const) or
  1866. ((taicpu(p).oper[0]^.typ=top_reg) and
  1867. not(RegUsedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  1868. )
  1869. ) then
  1870. begin
  1871. { we have
  1872. mov x, %treg
  1873. mov %treg, y
  1874. }
  1875. TransferUsedRegs(TmpUsedRegs);
  1876. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  1877. { We don't need to call UpdateUsedRegs for every instruction between
  1878. p and hp2 because the register we're concerned about will not
  1879. become deallocated (otherwise GetNextInstructionUsingReg would
  1880. have stopped at an earlier instruction). [Kit] }
  1881. TempRegUsed :=
  1882. RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) or
  1883. RegReadByInstruction(taicpu(p).oper[1]^.reg, hp1);
  1884. case taicpu(p).oper[0]^.typ Of
  1885. top_reg:
  1886. begin
  1887. { change
  1888. mov %reg, %treg
  1889. mov %treg, y
  1890. to
  1891. mov %reg, y
  1892. }
  1893. CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
  1894. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  1895. if taicpu(hp2).oper[1]^.reg = CurrentReg then
  1896. begin
  1897. { %reg = y - remove hp2 completely (doing it here instead of relying on
  1898. the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
  1899. if TempRegUsed then
  1900. begin
  1901. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
  1902. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  1903. asml.remove(hp2);
  1904. hp2.Free;
  1905. end
  1906. else
  1907. begin
  1908. asml.remove(hp2);
  1909. hp2.Free;
  1910. { We can remove the original MOV too }
  1911. DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
  1912. { take care of the register (de)allocs following p }
  1913. UpdateUsedRegs(tai(p.next));
  1914. asml.remove(p);
  1915. p.free;
  1916. p:=hp1;
  1917. Result:=true;
  1918. Exit;
  1919. end;
  1920. end
  1921. else
  1922. begin
  1923. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  1924. taicpu(hp2).loadReg(0, CurrentReg);
  1925. if TempRegUsed then
  1926. begin
  1927. { Don't remove the first instruction if the temporary register is in use }
  1928. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
  1929. { No need to set Result to True. If there's another instruction later on
  1930. that can be optimised, it will be detected when the main Pass 1 loop
  1931. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  1932. end
  1933. else
  1934. begin
  1935. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  1936. { take care of the register (de)allocs following p }
  1937. UpdateUsedRegs(tai(p.next));
  1938. asml.remove(p);
  1939. p.free;
  1940. p:=hp1;
  1941. Result:=true;
  1942. Exit;
  1943. end;
  1944. end;
  1945. end;
  1946. top_const:
  1947. if not (cs_opt_size in current_settings.optimizerswitches) or (taicpu(hp2).opsize = S_B) then
  1948. begin
  1949. { change
  1950. mov const, %treg
  1951. mov %treg, y
  1952. to
  1953. mov const, y
  1954. }
  1955. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  1956. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1957. begin
  1958. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  1959. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  1960. if TempRegUsed then
  1961. begin
  1962. { Don't remove the first instruction if the temporary register is in use }
  1963. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 7a)',hp2);
  1964. { No need to set Result to True. If there's another instruction later on
  1965. that can be optimised, it will be detected when the main Pass 1 loop
  1966. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  1967. end
  1968. else
  1969. begin
  1970. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  1971. { take care of the register (de)allocs following p }
  1972. UpdateUsedRegs(tai(p.next));
  1973. asml.remove(p);
  1974. p.free;
  1975. p:=hp1;
  1976. Result:=true;
  1977. Exit;
  1978. end;
  1979. end;
  1980. end;
  1981. else
  1982. Internalerror(2019103001);
  1983. end;
  1984. end;
  1985. { Change
  1986. mov %reg1, %reg2
  1987. xxx %reg2, ???
  1988. to
  1989. mov %reg1, %reg2
  1990. xxx %reg1, ???
  1991. to avoid a write/read penalty
  1992. }
  1993. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1994. ((MatchInstruction(hp1,A_OR,A_AND,A_TEST,[]) and
  1995. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1996. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^)) or
  1997. (MatchInstruction(hp1,A_CMP,[]) and
  1998. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1999. MatchOpType(taicpu(hp1),top_const,top_reg)
  2000. )
  2001. ) then
  2002. { we have
  2003. mov %reg1, %reg2
  2004. test/or/and %reg2, %reg2
  2005. }
  2006. begin
  2007. TransferUsedRegs(TmpUsedRegs);
  2008. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2009. { reg1 will be used after the first instruction,
  2010. so update the allocation info }
  2011. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2012. if GetNextInstruction(hp1, hp2) and
  2013. (hp2.typ = ait_instruction) and
  2014. taicpu(hp2).is_jmp and
  2015. not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2016. { change
  2017. mov %reg1, %reg2
  2018. test/or/and %reg2, %reg2
  2019. jxx
  2020. to
  2021. test %reg1, %reg1
  2022. jxx
  2023. }
  2024. begin
  2025. if taicpu(hp1).opcode<>A_CMP then
  2026. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  2027. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  2028. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2Test/Cmp/Or/AndJxx done',p);
  2029. RemoveCurrentP(p);
  2030. Exit;
  2031. end
  2032. else
  2033. { change
  2034. mov %reg1, %reg2
  2035. test/or/and %reg2, %reg2
  2036. to
  2037. mov %reg1, %reg2
  2038. test/or/and %reg1, %reg1
  2039. }
  2040. begin
  2041. if taicpu(hp1).opcode<>A_CMP then
  2042. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  2043. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  2044. DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2MovTest/Cmp/Or/AndJxx done',p);
  2045. end;
  2046. end;
  2047. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  2048. x >= RetOffset) as it doesn't do anything (it writes either to a
  2049. parameter or to the temporary storage room for the function
  2050. result)
  2051. }
  2052. if IsExitCode(hp1) and
  2053. MatchOpType(taicpu(p),top_reg,top_ref) and
  2054. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  2055. not(assigned(current_procinfo.procdef.funcretsym) and
  2056. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2057. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  2058. begin
  2059. asml.remove(p);
  2060. p.free;
  2061. p:=hp1;
  2062. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  2063. RemoveLastDeallocForFuncRes(p);
  2064. Result:=true;
  2065. exit;
  2066. end;
  2067. if MatchOpType(taicpu(p),top_reg,top_ref) and
  2068. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  2069. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2070. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2071. begin
  2072. { change
  2073. mov reg1, mem1
  2074. test/cmp x, mem1
  2075. to
  2076. mov reg1, mem1
  2077. test/cmp x, reg1
  2078. }
  2079. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  2080. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  2081. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2082. exit;
  2083. end;
  2084. if (taicpu(p).oper[1]^.typ = top_reg) and
  2085. (hp1.typ = ait_instruction) and
  2086. GetNextInstruction(hp1, hp2) and
  2087. MatchInstruction(hp2,A_MOV,[]) and
  2088. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  2089. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  2090. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  2091. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  2092. ) then
  2093. begin
  2094. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  2095. (taicpu(hp2).oper[0]^.typ=top_reg) then
  2096. { change movsX/movzX reg/ref, reg2
  2097. add/sub/or/... reg3/$const, reg2
  2098. mov reg2 reg/ref
  2099. dealloc reg2
  2100. to
  2101. add/sub/or/... reg3/$const, reg/ref }
  2102. begin
  2103. TransferUsedRegs(TmpUsedRegs);
  2104. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2105. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2106. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2107. begin
  2108. { by example:
  2109. movswl %si,%eax movswl %si,%eax p
  2110. decl %eax addl %edx,%eax hp1
  2111. movw %ax,%si movw %ax,%si hp2
  2112. ->
  2113. movswl %si,%eax movswl %si,%eax p
  2114. decw %eax addw %edx,%eax hp1
  2115. movw %ax,%si movw %ax,%si hp2
  2116. }
  2117. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  2118. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2119. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2120. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2121. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2122. {
  2123. ->
  2124. movswl %si,%eax movswl %si,%eax p
  2125. decw %si addw %dx,%si hp1
  2126. movw %ax,%si movw %ax,%si hp2
  2127. }
  2128. case taicpu(hp1).ops of
  2129. 1:
  2130. begin
  2131. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2132. if taicpu(hp1).oper[0]^.typ=top_reg then
  2133. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2134. end;
  2135. 2:
  2136. begin
  2137. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2138. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2139. (taicpu(hp1).opcode<>A_SHL) and
  2140. (taicpu(hp1).opcode<>A_SHR) and
  2141. (taicpu(hp1).opcode<>A_SAR) then
  2142. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2143. end;
  2144. else
  2145. internalerror(2008042701);
  2146. end;
  2147. {
  2148. ->
  2149. decw %si addw %dx,%si p
  2150. }
  2151. asml.remove(hp2);
  2152. hp2.Free;
  2153. RemoveCurrentP(p);
  2154. Result:=True;
  2155. Exit;
  2156. end;
  2157. end;
  2158. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2159. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  2160. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  2161. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  2162. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  2163. )
  2164. {$ifdef i386}
  2165. { byte registers of esi, edi, ebp, esp are not available on i386 }
  2166. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2167. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  2168. {$endif i386}
  2169. then
  2170. { change movsX/movzX reg/ref, reg2
  2171. add/sub/or/... regX/$const, reg2
  2172. mov reg2, reg3
  2173. dealloc reg2
  2174. to
  2175. movsX/movzX reg/ref, reg3
  2176. add/sub/or/... reg3/$const, reg3
  2177. }
  2178. begin
  2179. TransferUsedRegs(TmpUsedRegs);
  2180. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2181. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2182. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  2183. begin
  2184. { by example:
  2185. movswl %si,%eax movswl %si,%eax p
  2186. decl %eax addl %edx,%eax hp1
  2187. movw %ax,%si movw %ax,%si hp2
  2188. ->
  2189. movswl %si,%eax movswl %si,%eax p
  2190. decw %eax addw %edx,%eax hp1
  2191. movw %ax,%si movw %ax,%si hp2
  2192. }
  2193. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  2194. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  2195. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  2196. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  2197. { limit size of constants as well to avoid assembler errors, but
  2198. check opsize to avoid overflow when left shifting the 1 }
  2199. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  2200. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  2201. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2202. taicpu(p).changeopsize(taicpu(hp2).opsize);
  2203. if taicpu(p).oper[0]^.typ=top_reg then
  2204. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2205. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  2206. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  2207. {
  2208. ->
  2209. movswl %si,%eax movswl %si,%eax p
  2210. decw %si addw %dx,%si hp1
  2211. movw %ax,%si movw %ax,%si hp2
  2212. }
  2213. case taicpu(hp1).ops of
  2214. 1:
  2215. begin
  2216. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  2217. if taicpu(hp1).oper[0]^.typ=top_reg then
  2218. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2219. end;
  2220. 2:
  2221. begin
  2222. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  2223. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  2224. (taicpu(hp1).opcode<>A_SHL) and
  2225. (taicpu(hp1).opcode<>A_SHR) and
  2226. (taicpu(hp1).opcode<>A_SAR) then
  2227. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2228. end;
  2229. else
  2230. internalerror(2018111801);
  2231. end;
  2232. {
  2233. ->
  2234. decw %si addw %dx,%si p
  2235. }
  2236. asml.remove(hp2);
  2237. hp2.Free;
  2238. end;
  2239. end;
  2240. end;
  2241. if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  2242. GetNextInstruction(hp1, hp2) and
  2243. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  2244. MatchOperand(Taicpu(p).oper[0]^,0) and
  2245. (Taicpu(p).oper[1]^.typ = top_reg) and
  2246. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  2247. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  2248. { mov reg1,0
  2249. bts reg1,operand1 --> mov reg1,operand2
  2250. or reg1,operand2 bts reg1,operand1}
  2251. begin
  2252. Taicpu(hp2).opcode:=A_MOV;
  2253. asml.remove(hp1);
  2254. insertllitem(hp2,hp2.next,hp1);
  2255. asml.remove(p);
  2256. p.free;
  2257. p:=hp1;
  2258. Result:=true;
  2259. exit;
  2260. end;
  2261. if MatchInstruction(hp1,A_LEA,[S_L]) and
  2262. MatchOpType(Taicpu(p),top_ref,top_reg) and
  2263. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  2264. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  2265. ) or
  2266. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  2267. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  2268. )
  2269. ) then
  2270. { mov reg1,ref
  2271. lea reg2,[reg1,reg2]
  2272. to
  2273. add reg2,ref}
  2274. begin
  2275. TransferUsedRegs(TmpUsedRegs);
  2276. { reg1 may not be used afterwards }
  2277. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  2278. begin
  2279. Taicpu(hp1).opcode:=A_ADD;
  2280. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  2281. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  2282. asml.remove(p);
  2283. p.free;
  2284. p:=hp1;
  2285. result:=true;
  2286. exit;
  2287. end;
  2288. end;
  2289. end;
  2290. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  2291. var
  2292. hp1 : tai;
  2293. begin
  2294. Result:=false;
  2295. if taicpu(p).ops <> 2 then
  2296. exit;
  2297. if GetNextInstruction(p,hp1) and
  2298. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  2299. (taicpu(hp1).ops = 2) then
  2300. begin
  2301. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2302. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2303. { movXX reg1, mem1 or movXX mem1, reg1
  2304. movXX mem2, reg2 movXX reg2, mem2}
  2305. begin
  2306. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2307. { movXX reg1, mem1 or movXX mem1, reg1
  2308. movXX mem2, reg1 movXX reg2, mem1}
  2309. begin
  2310. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2311. begin
  2312. { Removes the second statement from
  2313. movXX reg1, mem1/reg2
  2314. movXX mem1/reg2, reg1
  2315. }
  2316. if taicpu(p).oper[0]^.typ=top_reg then
  2317. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2318. { Removes the second statement from
  2319. movXX mem1/reg1, reg2
  2320. movXX reg2, mem1/reg1
  2321. }
  2322. if (taicpu(p).oper[1]^.typ=top_reg) and
  2323. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  2324. begin
  2325. asml.remove(p);
  2326. p.free;
  2327. GetNextInstruction(hp1,p);
  2328. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  2329. end
  2330. else
  2331. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  2332. asml.remove(hp1);
  2333. hp1.free;
  2334. Result:=true;
  2335. exit;
  2336. end
  2337. end;
  2338. end;
  2339. end;
  2340. end;
  2341. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  2342. var
  2343. hp1 : tai;
  2344. begin
  2345. result:=false;
  2346. { replace
  2347. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  2348. MovX %mreg2,%mreg1
  2349. dealloc %mreg2
  2350. by
  2351. <Op>X %mreg2,%mreg1
  2352. ?
  2353. }
  2354. if GetNextInstruction(p,hp1) and
  2355. { we mix single and double opperations here because we assume that the compiler
  2356. generates vmovapd only after double operations and vmovaps only after single operations }
  2357. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  2358. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2359. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  2360. (taicpu(p).oper[0]^.typ=top_reg) then
  2361. begin
  2362. TransferUsedRegs(TmpUsedRegs);
  2363. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2364. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2365. begin
  2366. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  2367. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2368. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  2369. asml.Remove(hp1);
  2370. hp1.Free;
  2371. result:=true;
  2372. end;
  2373. end;
  2374. end;
  2375. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  2376. var
  2377. hp1, hp2, hp3: tai;
  2378. l : ASizeInt;
  2379. ref: Integer;
  2380. saveref: treference;
  2381. begin
  2382. Result:=false;
  2383. { removes seg register prefixes from LEA operations, as they
  2384. don't do anything}
  2385. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  2386. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  2387. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2388. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2389. { do not mess with leas acessing the stack pointer }
  2390. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  2391. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  2392. begin
  2393. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2394. (taicpu(p).oper[0]^.ref^.offset = 0) then
  2395. begin
  2396. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  2397. taicpu(p).oper[1]^.reg);
  2398. InsertLLItem(p.previous,p.next, hp1);
  2399. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  2400. p.free;
  2401. p:=hp1;
  2402. Result:=true;
  2403. exit;
  2404. end
  2405. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  2406. begin
  2407. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  2408. RemoveCurrentP(p);
  2409. Result:=true;
  2410. exit;
  2411. end
  2412. { continue to use lea to adjust the stack pointer,
  2413. it is the recommended way, but only if not optimizing for size }
  2414. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  2415. (cs_opt_size in current_settings.optimizerswitches) then
  2416. with taicpu(p).oper[0]^.ref^ do
  2417. if (base = taicpu(p).oper[1]^.reg) then
  2418. begin
  2419. l:=offset;
  2420. if (l=1) and UseIncDec then
  2421. begin
  2422. taicpu(p).opcode:=A_INC;
  2423. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2424. taicpu(p).ops:=1;
  2425. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2426. end
  2427. else if (l=-1) and UseIncDec then
  2428. begin
  2429. taicpu(p).opcode:=A_DEC;
  2430. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2431. taicpu(p).ops:=1;
  2432. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2433. end
  2434. else
  2435. begin
  2436. if (l<0) and (l<>-2147483648) then
  2437. begin
  2438. taicpu(p).opcode:=A_SUB;
  2439. taicpu(p).loadConst(0,-l);
  2440. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2441. end
  2442. else
  2443. begin
  2444. taicpu(p).opcode:=A_ADD;
  2445. taicpu(p).loadConst(0,l);
  2446. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2447. end;
  2448. end;
  2449. Result:=true;
  2450. exit;
  2451. end;
  2452. end;
  2453. if GetNextInstruction(p,hp1) and
  2454. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2455. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2456. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2457. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2458. begin
  2459. TransferUsedRegs(TmpUsedRegs);
  2460. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2461. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2462. begin
  2463. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2464. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2465. asml.Remove(hp1);
  2466. hp1.Free;
  2467. result:=true;
  2468. end;
  2469. end;
  2470. { changes
  2471. lea offset1(regX), reg1
  2472. lea offset2(reg1), reg1
  2473. to
  2474. lea offset1+offset2(regX), reg1 }
  2475. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
  2476. MatchInstruction(hp1,A_LEA,[S_L]) and
  2477. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2478. (taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
  2479. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2480. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2481. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2482. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2483. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2484. (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
  2485. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
  2486. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor) and
  2487. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
  2488. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
  2489. begin
  2490. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
  2491. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2492. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2493. RemoveCurrentP(p);
  2494. result:=true;
  2495. exit;
  2496. end;
  2497. { changes
  2498. lea <ref1>, reg1
  2499. <op> ...,<ref. with reg1>,...
  2500. to
  2501. <op> ...,<ref1>,... }
  2502. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  2503. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  2504. GetNextInstruction(p,hp1) and
  2505. (hp1.typ=ait_instruction) and
  2506. not(MatchInstruction(hp1,A_LEA,[])) then
  2507. begin
  2508. { find a reference which uses reg1 }
  2509. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  2510. ref:=0
  2511. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  2512. ref:=1
  2513. else
  2514. ref:=-1;
  2515. if (ref<>-1) and
  2516. { reg1 must be either the base or the index }
  2517. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  2518. begin
  2519. { reg1 can be removed from the reference }
  2520. saveref:=taicpu(hp1).oper[ref]^.ref^;
  2521. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  2522. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  2523. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  2524. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  2525. else
  2526. Internalerror(2019111201);
  2527. { check if the can insert all data of the lea into the second instruction }
  2528. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2529. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  2530. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  2531. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  2532. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  2533. ((taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) or (taicpu(hp1).oper[ref]^.ref^.scalefactor in [0,1])) and
  2534. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  2535. {$ifdef x86_64}
  2536. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  2537. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  2538. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  2539. )
  2540. {$endif x86_64}
  2541. then
  2542. begin
  2543. { reg1 might not used by the second instruction after it is remove from the reference }
  2544. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  2545. begin
  2546. TransferUsedRegs(TmpUsedRegs);
  2547. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2548. { reg1 is not updated so it might not be used afterwards }
  2549. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2550. begin
  2551. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  2552. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  2553. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  2554. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  2555. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  2556. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  2557. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  2558. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  2559. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  2560. if not(taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) then
  2561. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  2562. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  2563. RemoveCurrentP(p);
  2564. result:=true;
  2565. exit;
  2566. end
  2567. end;
  2568. end;
  2569. { recover }
  2570. taicpu(hp1).oper[ref]^.ref^:=saveref;
  2571. end;
  2572. end;
  2573. { replace
  2574. lea x(stackpointer),stackpointer
  2575. call procname
  2576. lea -x(stackpointer),stackpointer
  2577. ret
  2578. by
  2579. jmp procname
  2580. this should never hurt except when pic is used, not sure
  2581. how to handle it then
  2582. but do it only on level 4 because it destroys stack back traces
  2583. }
  2584. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2585. not(cs_create_pic in current_settings.moduleswitches) and
  2586. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2587. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  2588. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  2589. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  2590. (taicpu(p).oper[0]^.ref^.scalefactor in [0,1]) and
  2591. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  2592. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  2593. GetNextInstruction(p, hp1) and
  2594. MatchInstruction(hp1,A_CALL,[S_NO]) and
  2595. GetNextInstruction(hp1, hp2) and
  2596. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  2597. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  2598. (taicpu(p).oper[0]^.ref^.base=taicpu(hp2).oper[0]^.ref^.base) and
  2599. (taicpu(p).oper[0]^.ref^.index=taicpu(hp2).oper[0]^.ref^.index) and
  2600. (taicpu(p).oper[0]^.ref^.offset=-taicpu(hp2).oper[0]^.ref^.offset) and
  2601. (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp2).oper[0]^.ref^.relsymbol) and
  2602. (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp2).oper[0]^.ref^.scalefactor) and
  2603. (taicpu(p).oper[0]^.ref^.segment=taicpu(hp2).oper[0]^.ref^.segment) and
  2604. (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp2).oper[0]^.ref^.symbol) and
  2605. GetNextInstruction(hp2, hp3) and
  2606. MatchInstruction(hp3,A_RET,[S_NO]) and
  2607. (taicpu(hp3).ops=0) then
  2608. begin
  2609. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  2610. taicpu(hp1).opcode:=A_JMP;
  2611. taicpu(hp1).is_jmp:=true;
  2612. asml.remove(p);
  2613. asml.remove(hp2);
  2614. asml.remove(hp3);
  2615. p.free;
  2616. hp2.free;
  2617. hp3.free;
  2618. p:=hp1;
  2619. Result:=true;
  2620. end;
  2621. end;
  2622. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2623. var
  2624. hp1 : tai;
  2625. begin
  2626. DoSubAddOpt := False;
  2627. if GetLastInstruction(p, hp1) and
  2628. (hp1.typ = ait_instruction) and
  2629. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2630. case taicpu(hp1).opcode Of
  2631. A_DEC:
  2632. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2633. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2634. begin
  2635. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2636. asml.remove(hp1);
  2637. hp1.free;
  2638. end;
  2639. A_SUB:
  2640. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2641. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2642. begin
  2643. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2644. asml.remove(hp1);
  2645. hp1.free;
  2646. end;
  2647. A_ADD:
  2648. begin
  2649. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2650. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2651. begin
  2652. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2653. asml.remove(hp1);
  2654. hp1.free;
  2655. if (taicpu(p).oper[0]^.val = 0) then
  2656. begin
  2657. hp1 := tai(p.next);
  2658. asml.remove(p);
  2659. p.free;
  2660. if not GetLastInstruction(hp1, p) then
  2661. p := hp1;
  2662. DoSubAddOpt := True;
  2663. end
  2664. end;
  2665. end;
  2666. else
  2667. ;
  2668. end;
  2669. end;
  2670. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2671. {$ifdef i386}
  2672. var
  2673. hp1 : tai;
  2674. {$endif i386}
  2675. begin
  2676. Result:=false;
  2677. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2678. { * change "sub/add const1, reg" or "dec reg" followed by
  2679. "sub const2, reg" to one "sub ..., reg" }
  2680. if MatchOpType(taicpu(p),top_const,top_reg) then
  2681. begin
  2682. {$ifdef i386}
  2683. if (taicpu(p).oper[0]^.val = 2) and
  2684. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2685. { Don't do the sub/push optimization if the sub }
  2686. { comes from setting up the stack frame (JM) }
  2687. (not(GetLastInstruction(p,hp1)) or
  2688. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2689. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2690. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2691. begin
  2692. hp1 := tai(p.next);
  2693. while Assigned(hp1) and
  2694. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2695. not RegReadByInstruction(NR_ESP,hp1) and
  2696. not RegModifiedByInstruction(NR_ESP,hp1) do
  2697. hp1 := tai(hp1.next);
  2698. if Assigned(hp1) and
  2699. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2700. begin
  2701. taicpu(hp1).changeopsize(S_L);
  2702. if taicpu(hp1).oper[0]^.typ=top_reg then
  2703. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2704. hp1 := tai(p.next);
  2705. asml.remove(p);
  2706. p.free;
  2707. p := hp1;
  2708. Result:=true;
  2709. exit;
  2710. end;
  2711. end;
  2712. {$endif i386}
  2713. if DoSubAddOpt(p) then
  2714. Result:=true;
  2715. end;
  2716. end;
  2717. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2718. var
  2719. TmpBool1,TmpBool2 : Boolean;
  2720. tmpref : treference;
  2721. hp1,hp2: tai;
  2722. begin
  2723. Result:=false;
  2724. if MatchOpType(taicpu(p),top_const,top_reg) and
  2725. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2726. (taicpu(p).oper[0]^.val <= 3) then
  2727. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2728. begin
  2729. { should we check the next instruction? }
  2730. TmpBool1 := True;
  2731. { have we found an add/sub which could be
  2732. integrated in the lea? }
  2733. TmpBool2 := False;
  2734. reference_reset(tmpref,2,[]);
  2735. TmpRef.index := taicpu(p).oper[1]^.reg;
  2736. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2737. while TmpBool1 and
  2738. GetNextInstruction(p, hp1) and
  2739. (tai(hp1).typ = ait_instruction) and
  2740. ((((taicpu(hp1).opcode = A_ADD) or
  2741. (taicpu(hp1).opcode = A_SUB)) and
  2742. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2743. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2744. (((taicpu(hp1).opcode = A_INC) or
  2745. (taicpu(hp1).opcode = A_DEC)) and
  2746. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2747. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  2748. ((taicpu(hp1).opcode = A_LEA) and
  2749. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  2750. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  2751. (not GetNextInstruction(hp1,hp2) or
  2752. not instrReadsFlags(hp2)) Do
  2753. begin
  2754. TmpBool1 := False;
  2755. if taicpu(hp1).opcode=A_LEA then
  2756. begin
  2757. if (TmpRef.base = NR_NO) and
  2758. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  2759. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  2760. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  2761. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  2762. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  2763. begin
  2764. TmpBool1 := True;
  2765. TmpBool2 := True;
  2766. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  2767. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  2768. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  2769. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  2770. asml.remove(hp1);
  2771. hp1.free;
  2772. end
  2773. end
  2774. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2775. begin
  2776. TmpBool1 := True;
  2777. TmpBool2 := True;
  2778. case taicpu(hp1).opcode of
  2779. A_ADD:
  2780. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2781. A_SUB:
  2782. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2783. else
  2784. internalerror(2019050536);
  2785. end;
  2786. asml.remove(hp1);
  2787. hp1.free;
  2788. end
  2789. else
  2790. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2791. (((taicpu(hp1).opcode = A_ADD) and
  2792. (TmpRef.base = NR_NO)) or
  2793. (taicpu(hp1).opcode = A_INC) or
  2794. (taicpu(hp1).opcode = A_DEC)) then
  2795. begin
  2796. TmpBool1 := True;
  2797. TmpBool2 := True;
  2798. case taicpu(hp1).opcode of
  2799. A_ADD:
  2800. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2801. A_INC:
  2802. inc(TmpRef.offset);
  2803. A_DEC:
  2804. dec(TmpRef.offset);
  2805. else
  2806. internalerror(2019050535);
  2807. end;
  2808. asml.remove(hp1);
  2809. hp1.free;
  2810. end;
  2811. end;
  2812. if TmpBool2
  2813. {$ifndef x86_64}
  2814. or
  2815. ((current_settings.optimizecputype < cpu_Pentium2) and
  2816. (taicpu(p).oper[0]^.val <= 3) and
  2817. not(cs_opt_size in current_settings.optimizerswitches))
  2818. {$endif x86_64}
  2819. then
  2820. begin
  2821. if not(TmpBool2) and
  2822. (taicpu(p).oper[0]^.val=1) then
  2823. begin
  2824. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2825. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2826. end
  2827. else
  2828. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2829. taicpu(p).oper[1]^.reg);
  2830. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  2831. InsertLLItem(p.previous, p.next, hp1);
  2832. p.free;
  2833. p := hp1;
  2834. end;
  2835. end
  2836. {$ifndef x86_64}
  2837. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2838. MatchOpType(taicpu(p),top_const,top_reg) then
  2839. begin
  2840. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2841. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2842. (unlike shl, which is only Tairable in the U pipe) }
  2843. if taicpu(p).oper[0]^.val=1 then
  2844. begin
  2845. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2846. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2847. InsertLLItem(p.previous, p.next, hp1);
  2848. p.free;
  2849. p := hp1;
  2850. end
  2851. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2852. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2853. else if (taicpu(p).opsize = S_L) and
  2854. (taicpu(p).oper[0]^.val<= 3) then
  2855. begin
  2856. reference_reset(tmpref,2,[]);
  2857. TmpRef.index := taicpu(p).oper[1]^.reg;
  2858. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2859. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2860. InsertLLItem(p.previous, p.next, hp1);
  2861. p.free;
  2862. p := hp1;
  2863. end;
  2864. end
  2865. {$endif x86_64}
  2866. ;
  2867. end;
  2868. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2869. var
  2870. hp1,hp2,next: tai; SetC, JumpC: TAsmCond; Unconditional: Boolean;
  2871. begin
  2872. Result:=false;
  2873. if MatchOpType(taicpu(p),top_reg) and
  2874. GetNextInstruction(p, hp1) and
  2875. ((MatchInstruction(hp1, A_TEST, [S_B]) and
  2876. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2877. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg)) or
  2878. (MatchInstruction(hp1, A_CMP, [S_B]) and
  2879. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2880. (taicpu(hp1).oper[0]^.val=0))
  2881. ) and
  2882. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2883. GetNextInstruction(hp1, hp2) and
  2884. MatchInstruction(hp2, A_Jcc, []) then
  2885. { Change from: To:
  2886. set(C) %reg j(~C) label
  2887. test %reg,%reg/cmp $0,%reg
  2888. je label
  2889. set(C) %reg j(C) label
  2890. test %reg,%reg/cmp $0,%reg
  2891. jne label
  2892. }
  2893. begin
  2894. next := tai(p.Next);
  2895. TransferUsedRegs(TmpUsedRegs);
  2896. UpdateUsedRegs(TmpUsedRegs, next);
  2897. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2898. JumpC := taicpu(hp2).condition;
  2899. Unconditional := False;
  2900. if conditions_equal(JumpC, C_E) then
  2901. SetC := inverse_cond(taicpu(p).condition)
  2902. else if conditions_equal(JumpC, C_NE) then
  2903. SetC := taicpu(p).condition
  2904. else
  2905. { We've got something weird here (and inefficent) }
  2906. begin
  2907. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  2908. SetC := C_NONE;
  2909. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  2910. if condition_in(C_AE, JumpC) then
  2911. Unconditional := True
  2912. else
  2913. { Not sure what to do with this jump - drop out }
  2914. Exit;
  2915. end;
  2916. asml.Remove(hp1);
  2917. hp1.Free;
  2918. if Unconditional then
  2919. MakeUnconditional(taicpu(hp2))
  2920. else
  2921. begin
  2922. if SetC = C_NONE then
  2923. InternalError(2018061401);
  2924. taicpu(hp2).SetCondition(SetC);
  2925. end;
  2926. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2927. begin
  2928. asml.Remove(p);
  2929. UpdateUsedRegs(next);
  2930. p.Free;
  2931. Result := True;
  2932. p := hp2;
  2933. end;
  2934. DebugMsg(SPeepholeOptimization + 'SETcc/TESTCmp/Jcc -> Jcc',p);
  2935. end;
  2936. end;
  2937. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2938. { returns true if a "continue" should be done after this optimization }
  2939. var
  2940. hp1, hp2: tai;
  2941. begin
  2942. Result := false;
  2943. if MatchOpType(taicpu(p),top_ref) and
  2944. GetNextInstruction(p, hp1) and
  2945. (hp1.typ = ait_instruction) and
  2946. (((taicpu(hp1).opcode = A_FLD) and
  2947. (taicpu(p).opcode = A_FSTP)) or
  2948. ((taicpu(p).opcode = A_FISTP) and
  2949. (taicpu(hp1).opcode = A_FILD))) and
  2950. MatchOpType(taicpu(hp1),top_ref) and
  2951. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2952. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2953. begin
  2954. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2955. if (taicpu(p).opsize=S_FX) and
  2956. GetNextInstruction(hp1, hp2) and
  2957. (hp2.typ = ait_instruction) and
  2958. IsExitCode(hp2) and
  2959. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2960. not(assigned(current_procinfo.procdef.funcretsym) and
  2961. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2962. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2963. begin
  2964. asml.remove(p);
  2965. asml.remove(hp1);
  2966. p.free;
  2967. hp1.free;
  2968. p := hp2;
  2969. RemoveLastDeallocForFuncRes(p);
  2970. Result := true;
  2971. end
  2972. (* can't be done because the store operation rounds
  2973. else
  2974. { fst can't store an extended value! }
  2975. if (taicpu(p).opsize <> S_FX) and
  2976. (taicpu(p).opsize <> S_IQ) then
  2977. begin
  2978. if (taicpu(p).opcode = A_FSTP) then
  2979. taicpu(p).opcode := A_FST
  2980. else taicpu(p).opcode := A_FIST;
  2981. asml.remove(hp1);
  2982. hp1.free;
  2983. end
  2984. *)
  2985. end;
  2986. end;
  2987. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2988. var
  2989. hp1, hp2: tai;
  2990. begin
  2991. result:=false;
  2992. if MatchOpType(taicpu(p),top_reg) and
  2993. GetNextInstruction(p, hp1) and
  2994. (hp1.typ = Ait_Instruction) and
  2995. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2996. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2997. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2998. { change to
  2999. fld reg fxxx reg,st
  3000. fxxxp st, st1 (hp1)
  3001. Remark: non commutative operations must be reversed!
  3002. }
  3003. begin
  3004. case taicpu(hp1).opcode Of
  3005. A_FMULP,A_FADDP,
  3006. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  3007. begin
  3008. case taicpu(hp1).opcode Of
  3009. A_FADDP: taicpu(hp1).opcode := A_FADD;
  3010. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  3011. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  3012. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  3013. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  3014. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  3015. else
  3016. internalerror(2019050534);
  3017. end;
  3018. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3019. taicpu(hp1).oper[1]^.reg := NR_ST;
  3020. asml.remove(p);
  3021. p.free;
  3022. p := hp1;
  3023. Result:=true;
  3024. exit;
  3025. end;
  3026. else
  3027. ;
  3028. end;
  3029. end
  3030. else
  3031. if MatchOpType(taicpu(p),top_ref) and
  3032. GetNextInstruction(p, hp2) and
  3033. (hp2.typ = Ait_Instruction) and
  3034. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3035. (taicpu(p).opsize in [S_FS, S_FL]) and
  3036. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  3037. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  3038. if GetLastInstruction(p, hp1) and
  3039. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  3040. MatchOpType(taicpu(hp1),top_ref) and
  3041. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  3042. if ((taicpu(hp2).opcode = A_FMULP) or
  3043. (taicpu(hp2).opcode = A_FADDP)) then
  3044. { change to
  3045. fld/fst mem1 (hp1) fld/fst mem1
  3046. fld mem1 (p) fadd/
  3047. faddp/ fmul st, st
  3048. fmulp st, st1 (hp2) }
  3049. begin
  3050. asml.remove(p);
  3051. p.free;
  3052. p := hp1;
  3053. if (taicpu(hp2).opcode = A_FADDP) then
  3054. taicpu(hp2).opcode := A_FADD
  3055. else
  3056. taicpu(hp2).opcode := A_FMUL;
  3057. taicpu(hp2).oper[1]^.reg := NR_ST;
  3058. end
  3059. else
  3060. { change to
  3061. fld/fst mem1 (hp1) fld/fst mem1
  3062. fld mem1 (p) fld st}
  3063. begin
  3064. taicpu(p).changeopsize(S_FL);
  3065. taicpu(p).loadreg(0,NR_ST);
  3066. end
  3067. else
  3068. begin
  3069. case taicpu(hp2).opcode Of
  3070. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  3071. { change to
  3072. fld/fst mem1 (hp1) fld/fst mem1
  3073. fld mem2 (p) fxxx mem2
  3074. fxxxp st, st1 (hp2) }
  3075. begin
  3076. case taicpu(hp2).opcode Of
  3077. A_FADDP: taicpu(p).opcode := A_FADD;
  3078. A_FMULP: taicpu(p).opcode := A_FMUL;
  3079. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  3080. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  3081. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  3082. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  3083. else
  3084. internalerror(2019050533);
  3085. end;
  3086. asml.remove(hp2);
  3087. hp2.free;
  3088. end
  3089. else
  3090. ;
  3091. end
  3092. end
  3093. end;
  3094. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  3095. var
  3096. v: TCGInt;
  3097. hp1, hp2: tai;
  3098. begin
  3099. Result:=false;
  3100. if taicpu(p).oper[0]^.typ = top_const then
  3101. begin
  3102. { Though GetNextInstruction can be factored out, it is an expensive
  3103. call, so delay calling it until we have first checked cheaper
  3104. conditions that are independent of it. }
  3105. if (taicpu(p).oper[0]^.val = 0) and
  3106. (taicpu(p).oper[1]^.typ = top_reg) and
  3107. GetNextInstruction(p, hp1) and
  3108. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
  3109. begin
  3110. hp2 := p;
  3111. { When dealing with "cmp $0,%reg", only ZF and SF contain
  3112. anything meaningful once it's converted to "test %reg,%reg";
  3113. additionally, some jumps will always (or never) branch, so
  3114. evaluate every jump immediately following the
  3115. comparison, optimising the conditions if possible.
  3116. Similarly with SETcc... those that are always set to 0 or 1
  3117. are changed to MOV instructions }
  3118. while GetNextInstruction(hp2, hp1) and
  3119. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) do
  3120. begin
  3121. case taicpu(hp1).condition of
  3122. C_B, C_C, C_NAE, C_O:
  3123. { For B/NAE:
  3124. Will never branch since an unsigned integer can never be below zero
  3125. For C/O:
  3126. Result cannot overflow because 0 is being subtracted
  3127. }
  3128. begin
  3129. if taicpu(hp1).opcode = A_Jcc then
  3130. begin
  3131. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
  3132. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  3133. AsmL.Remove(hp1);
  3134. hp1.Free;
  3135. { Since hp1 was deleted, hp2 must not be updated }
  3136. Continue;
  3137. end
  3138. else
  3139. begin
  3140. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
  3141. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  3142. taicpu(hp1).opcode := A_MOV;
  3143. taicpu(hp1).condition := C_None;
  3144. taicpu(hp1).opsize := S_B;
  3145. taicpu(hp1).allocate_oper(2);
  3146. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3147. taicpu(hp1).loadconst(0, 0);
  3148. end;
  3149. end;
  3150. C_BE, C_NA:
  3151. begin
  3152. { Will only branch if equal to zero }
  3153. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
  3154. taicpu(hp1).condition := C_E;
  3155. end;
  3156. C_A, C_NBE:
  3157. begin
  3158. { Will only branch if not equal to zero }
  3159. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
  3160. taicpu(hp1).condition := C_NE;
  3161. end;
  3162. C_AE, C_NB, C_NC, C_NO:
  3163. begin
  3164. { Will always branch }
  3165. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
  3166. if taicpu(hp1).opcode = A_Jcc then
  3167. begin
  3168. MakeUnconditional(taicpu(hp1));
  3169. { Any jumps/set that follow will now be dead code }
  3170. RemoveDeadCodeAfterJump(taicpu(hp1));
  3171. Break;
  3172. end
  3173. else
  3174. begin
  3175. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  3176. taicpu(hp1).opcode := A_MOV;
  3177. taicpu(hp1).condition := C_None;
  3178. taicpu(hp1).opsize := S_B;
  3179. taicpu(hp1).allocate_oper(2);
  3180. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3181. taicpu(hp1).loadconst(0, 1);
  3182. end;
  3183. end;
  3184. C_None:
  3185. InternalError(2020012201);
  3186. C_P, C_PE, C_NP, C_PO:
  3187. { We can't handle parity checks and they should never be generated
  3188. after a general-purpose CMP (it's used in some floating-point
  3189. comparisons that don't use CMP) }
  3190. InternalError(2020012202);
  3191. else
  3192. { Zero/Equality, Sign, their complements and all of the
  3193. signed comparisons do not need to be converted };
  3194. end;
  3195. hp2 := hp1;
  3196. end;
  3197. { Convert the instruction to a TEST }
  3198. taicpu(p).opcode := A_TEST;
  3199. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3200. Result := True;
  3201. Exit;
  3202. end
  3203. else if (taicpu(p).oper[0]^.val = 1) and
  3204. GetNextInstruction(p, hp1) and
  3205. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  3206. (taicpu(hp1).condition in [C_L, C_NGE]) then
  3207. begin
  3208. { Convert; To:
  3209. cmp $1,r/m cmp $0,r/m
  3210. jl @lbl jle @lbl
  3211. }
  3212. DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
  3213. taicpu(p).oper[0]^.val := 0;
  3214. taicpu(hp1).condition := C_LE;
  3215. { If the instruction is now "cmp $0,%reg", convert it to a
  3216. TEST (and effectively do the work of the "cmp $0,%reg" in
  3217. the block above)
  3218. If it's a reference, we can get away with not setting
  3219. Result to True because he haven't evaluated the jump
  3220. in this pass yet.
  3221. }
  3222. if (taicpu(p).oper[1]^.typ = top_reg) then
  3223. begin
  3224. taicpu(p).opcode := A_TEST;
  3225. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3226. Result := True;
  3227. end;
  3228. Exit;
  3229. end
  3230. else if (taicpu(p).oper[1]^.typ = top_reg) then
  3231. begin
  3232. { cmp register,$8000 neg register
  3233. je target --> jo target
  3234. .... only if register is deallocated before jump.}
  3235. case Taicpu(p).opsize of
  3236. S_B: v:=$80;
  3237. S_W: v:=$8000;
  3238. S_L: v:=qword($80000000);
  3239. { S_Q will never happen: cmp with 64 bit constants is not possible }
  3240. S_Q:
  3241. Exit;
  3242. else
  3243. internalerror(2013112905);
  3244. end;
  3245. if (taicpu(p).oper[0]^.val=v) and
  3246. GetNextInstruction(p, hp1) and
  3247. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  3248. (Taicpu(hp1).condition in [C_E,C_NE]) then
  3249. begin
  3250. TransferUsedRegs(TmpUsedRegs);
  3251. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3252. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  3253. begin
  3254. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  3255. Taicpu(p).opcode:=A_NEG;
  3256. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  3257. Taicpu(p).clearop(1);
  3258. Taicpu(p).ops:=1;
  3259. if Taicpu(hp1).condition=C_E then
  3260. Taicpu(hp1).condition:=C_O
  3261. else
  3262. Taicpu(hp1).condition:=C_NO;
  3263. Result:=true;
  3264. exit;
  3265. end;
  3266. end;
  3267. end;
  3268. end;
  3269. end;
  3270. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  3271. function IsXCHGAcceptable: Boolean; inline;
  3272. begin
  3273. { Always accept if optimising for size }
  3274. Result := (cs_opt_size in current_settings.optimizerswitches) or
  3275. (
  3276. {$ifdef x86_64}
  3277. { XCHG takes 3 cycles on AMD Athlon64 }
  3278. (current_settings.optimizecputype >= cpu_core_i)
  3279. {$else x86_64}
  3280. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  3281. than 3, so it becomes a saving compared to three MOVs with two of
  3282. them able to execute simultaneously. [Kit] }
  3283. (current_settings.optimizecputype >= cpu_PentiumM)
  3284. {$endif x86_64}
  3285. );
  3286. end;
  3287. var
  3288. NewRef: TReference;
  3289. hp1,hp2,hp3: tai;
  3290. {$ifndef x86_64}
  3291. hp4: tai;
  3292. OperIdx: Integer;
  3293. {$endif x86_64}
  3294. begin
  3295. Result:=false;
  3296. if not GetNextInstruction(p, hp1) then
  3297. Exit;
  3298. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  3299. begin
  3300. { Sometimes the MOVs that OptPass2JMP produces can be improved
  3301. further, but we can't just put this jump optimisation in pass 1
  3302. because it tends to perform worse when conditional jumps are
  3303. nearby (e.g. when converting CMOV instructions). [Kit] }
  3304. if OptPass2JMP(hp1) then
  3305. { call OptPass1MOV once to potentially merge any MOVs that were created }
  3306. Result := OptPass1MOV(p)
  3307. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  3308. returned True and the instruction is still a MOV, thus checking
  3309. the optimisations below }
  3310. { If OptPass2JMP returned False, no optimisations were done to
  3311. the jump and there are no further optimisations that can be done
  3312. to the MOV instruction on this pass }
  3313. end
  3314. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3315. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  3316. MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
  3317. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3318. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  3319. { be lazy, checking separately for sub would be slightly better }
  3320. (abs(taicpu(hp1).oper[0]^.val)<=$7fffffff) then
  3321. begin
  3322. { Change:
  3323. movl/q %reg1,%reg2 movl/q %reg1,%reg2
  3324. addl/q $x,%reg2 subl/q $x,%reg2
  3325. To:
  3326. leal/q x(%reg1),%reg2 leal/q -x(%reg1),%reg2
  3327. }
  3328. if not GetNextInstruction(hp1, hp2) or
  3329. { The FLAGS register isn't always tracked properly, so do not
  3330. perform this optimisation if a conditional statement follows }
  3331. not MatchInstruction(hp2, [A_Jcc, A_SETcc, A_CMOVcc], []) then
  3332. begin
  3333. reference_reset(NewRef, 1, []);
  3334. NewRef.base := taicpu(p).oper[0]^.reg;
  3335. NewRef.scalefactor := 1;
  3336. if taicpu(hp1).opcode = A_ADD then
  3337. begin
  3338. DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
  3339. NewRef.offset := taicpu(hp1).oper[0]^.val;
  3340. end
  3341. else
  3342. begin
  3343. DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
  3344. NewRef.offset := -taicpu(hp1).oper[0]^.val;
  3345. end;
  3346. taicpu(p).opcode := A_LEA;
  3347. taicpu(p).loadref(0, NewRef);
  3348. Asml.Remove(hp1);
  3349. hp1.Free;
  3350. Result := True;
  3351. Exit;
  3352. end;
  3353. end
  3354. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3355. {$ifdef x86_64}
  3356. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  3357. {$else x86_64}
  3358. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  3359. {$endif x86_64}
  3360. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3361. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  3362. { mov reg1, reg2 mov reg1, reg2
  3363. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  3364. begin
  3365. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  3366. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  3367. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  3368. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  3369. TransferUsedRegs(TmpUsedRegs);
  3370. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3371. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  3372. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  3373. then
  3374. begin
  3375. asml.remove(p);
  3376. p.free;
  3377. p := hp1;
  3378. Result:=true;
  3379. end;
  3380. exit;
  3381. end
  3382. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3383. IsXCHGAcceptable and
  3384. { XCHG doesn't support 8-byte registers }
  3385. (taicpu(p).opsize <> S_B) and
  3386. MatchInstruction(hp1, A_MOV, []) and
  3387. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3388. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  3389. GetNextInstruction(hp1, hp2) and
  3390. MatchInstruction(hp2, A_MOV, []) and
  3391. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  3392. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  3393. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  3394. begin
  3395. { mov %reg1,%reg2
  3396. mov %reg3,%reg1 -> xchg %reg3,%reg1
  3397. mov %reg2,%reg3
  3398. (%reg2 not used afterwards)
  3399. Note that xchg takes 3 cycles to execute, and generally mov's take
  3400. only one cycle apiece, but the first two mov's can be executed in
  3401. parallel, only taking 2 cycles overall. Older processors should
  3402. therefore only optimise for size. [Kit]
  3403. }
  3404. TransferUsedRegs(TmpUsedRegs);
  3405. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3406. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3407. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  3408. begin
  3409. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  3410. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  3411. taicpu(hp1).opcode := A_XCHG;
  3412. asml.Remove(p);
  3413. asml.Remove(hp2);
  3414. p.Free;
  3415. hp2.Free;
  3416. p := hp1;
  3417. Result := True;
  3418. Exit;
  3419. end;
  3420. end
  3421. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3422. {$ifdef x86_64}
  3423. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  3424. {$else x86_64}
  3425. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  3426. {$endif x86_64}
  3427. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  3428. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  3429. or
  3430. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  3431. ) and
  3432. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  3433. { mov reg1, reg2
  3434. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  3435. begin
  3436. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  3437. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  3438. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  3439. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  3440. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  3441. asml.remove(p);
  3442. p.free;
  3443. p := hp1;
  3444. Result:=true;
  3445. exit;
  3446. end
  3447. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  3448. MatchInstruction(hp1, A_SAR, []) then
  3449. begin
  3450. if MatchOperand(taicpu(hp1).oper[0]^, 31) then
  3451. begin
  3452. { the use of %edx also covers the opsize being S_L }
  3453. if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
  3454. begin
  3455. { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
  3456. if (taicpu(p).oper[0]^.reg = NR_EAX) and
  3457. (taicpu(p).oper[1]^.reg = NR_EDX) then
  3458. begin
  3459. { Change:
  3460. movl %eax,%edx
  3461. sarl $31,%edx
  3462. To:
  3463. cltd
  3464. }
  3465. DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
  3466. Asml.Remove(hp1);
  3467. hp1.Free;
  3468. taicpu(p).opcode := A_CDQ;
  3469. taicpu(p).opsize := S_NO;
  3470. taicpu(p).clearop(1);
  3471. taicpu(p).clearop(0);
  3472. taicpu(p).ops:=0;
  3473. Result := True;
  3474. end
  3475. else if (cs_opt_size in current_settings.optimizerswitches) and
  3476. (taicpu(p).oper[0]^.reg = NR_EDX) and
  3477. (taicpu(p).oper[1]^.reg = NR_EAX) then
  3478. begin
  3479. { Change:
  3480. movl %edx,%eax
  3481. sarl $31,%edx
  3482. To:
  3483. movl %edx,%eax
  3484. cltd
  3485. Note that this creates a dependency between the two instructions,
  3486. so only perform if optimising for size.
  3487. }
  3488. DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
  3489. taicpu(hp1).opcode := A_CDQ;
  3490. taicpu(hp1).opsize := S_NO;
  3491. taicpu(hp1).clearop(1);
  3492. taicpu(hp1).clearop(0);
  3493. taicpu(hp1).ops:=0;
  3494. end;
  3495. {$ifndef x86_64}
  3496. end
  3497. { Don't bother if CMOV is supported, because a more optimal
  3498. sequence would have been generated for the Abs() intrinsic }
  3499. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  3500. { the use of %eax also covers the opsize being S_L }
  3501. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
  3502. (taicpu(p).oper[0]^.reg = NR_EAX) and
  3503. (taicpu(p).oper[1]^.reg = NR_EDX) and
  3504. GetNextInstruction(hp1, hp2) and
  3505. MatchInstruction(hp2, A_XOR, [S_L]) and
  3506. MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
  3507. MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
  3508. GetNextInstruction(hp2, hp3) and
  3509. MatchInstruction(hp3, A_SUB, [S_L]) and
  3510. MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
  3511. MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
  3512. begin
  3513. { Change:
  3514. movl %eax,%edx
  3515. sarl $31,%eax
  3516. xorl %eax,%edx
  3517. subl %eax,%edx
  3518. (Instruction that uses %edx)
  3519. (%eax deallocated)
  3520. (%edx deallocated)
  3521. To:
  3522. cltd
  3523. xorl %edx,%eax <-- Note the registers have swapped
  3524. subl %edx,%eax
  3525. (Instruction that uses %eax) <-- %eax rather than %edx
  3526. }
  3527. TransferUsedRegs(TmpUsedRegs);
  3528. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3529. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3530. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  3531. if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
  3532. begin
  3533. if GetNextInstruction(hp3, hp4) and
  3534. not RegModifiedByInstruction(NR_EDX, hp4) and
  3535. not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
  3536. begin
  3537. DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
  3538. taicpu(p).opcode := A_CDQ;
  3539. taicpu(p).clearop(1);
  3540. taicpu(p).clearop(0);
  3541. taicpu(p).ops:=0;
  3542. AsmL.Remove(hp1);
  3543. hp1.Free;
  3544. taicpu(hp2).loadreg(0, NR_EDX);
  3545. taicpu(hp2).loadreg(1, NR_EAX);
  3546. taicpu(hp3).loadreg(0, NR_EDX);
  3547. taicpu(hp3).loadreg(1, NR_EAX);
  3548. AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
  3549. { Convert references in the following instruction (hp4) from %edx to %eax }
  3550. for OperIdx := 0 to taicpu(hp4).ops - 1 do
  3551. with taicpu(hp4).oper[OperIdx]^ do
  3552. case typ of
  3553. top_reg:
  3554. if reg = NR_EDX then
  3555. reg := NR_EAX;
  3556. top_ref:
  3557. begin
  3558. if ref^.base = NR_EDX then
  3559. ref^.base := NR_EAX;
  3560. if ref^.index = NR_EDX then
  3561. ref^.index := NR_EAX;
  3562. end;
  3563. else
  3564. ;
  3565. end;
  3566. end;
  3567. end;
  3568. {$else x86_64}
  3569. end;
  3570. end
  3571. else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
  3572. { the use of %rdx also covers the opsize being S_Q }
  3573. MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
  3574. begin
  3575. { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
  3576. if (taicpu(p).oper[0]^.reg = NR_RAX) and
  3577. (taicpu(p).oper[1]^.reg = NR_RDX) then
  3578. begin
  3579. { Change:
  3580. movq %rax,%rdx
  3581. sarq $63,%rdx
  3582. To:
  3583. cqto
  3584. }
  3585. DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
  3586. Asml.Remove(hp1);
  3587. hp1.Free;
  3588. taicpu(p).opcode := A_CQO;
  3589. taicpu(p).opsize := S_NO;
  3590. taicpu(p).clearop(1);
  3591. taicpu(p).clearop(0);
  3592. taicpu(p).ops:=0;
  3593. Result := True;
  3594. end
  3595. else if (cs_opt_size in current_settings.optimizerswitches) and
  3596. (taicpu(p).oper[0]^.reg = NR_RDX) and
  3597. (taicpu(p).oper[1]^.reg = NR_RAX) then
  3598. begin
  3599. { Change:
  3600. movq %rdx,%rax
  3601. sarq $63,%rdx
  3602. To:
  3603. movq %rdx,%rax
  3604. cqto
  3605. Note that this creates a dependency between the two instructions,
  3606. so only perform if optimising for size.
  3607. }
  3608. DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
  3609. taicpu(hp1).opcode := A_CQO;
  3610. taicpu(hp1).opsize := S_NO;
  3611. taicpu(hp1).clearop(1);
  3612. taicpu(hp1).clearop(0);
  3613. taicpu(hp1).ops:=0;
  3614. {$endif x86_64}
  3615. end;
  3616. end;
  3617. end
  3618. else if MatchInstruction(hp1, A_MOV, []) and
  3619. (taicpu(hp1).oper[1]^.typ = top_reg) then
  3620. { Though "GetNextInstruction" could be factored out, along with
  3621. the instructions that depend on hp2, it is an expensive call that
  3622. should be delayed for as long as possible, hence we do cheaper
  3623. checks first that are likely to be False. [Kit] }
  3624. begin
  3625. if MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
  3626. (
  3627. (
  3628. (taicpu(hp1).oper[1]^.reg = NR_EAX) and
  3629. (
  3630. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  3631. MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
  3632. )
  3633. ) or
  3634. (
  3635. (taicpu(hp1).oper[1]^.reg = NR_EDX) and
  3636. (
  3637. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  3638. MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
  3639. )
  3640. )
  3641. ) and
  3642. GetNextInstruction(hp1, hp2) and
  3643. MatchInstruction(hp2, A_SAR, []) and
  3644. MatchOperand(taicpu(hp2).oper[0]^, 31) then
  3645. begin
  3646. if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
  3647. begin
  3648. { Change:
  3649. movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
  3650. movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
  3651. sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
  3652. To:
  3653. movl r/m,%eax <- Note the change in register
  3654. cltd
  3655. }
  3656. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
  3657. AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
  3658. taicpu(p).loadreg(1, NR_EAX);
  3659. taicpu(hp1).opcode := A_CDQ;
  3660. taicpu(hp1).clearop(1);
  3661. taicpu(hp1).clearop(0);
  3662. taicpu(hp1).ops:=0;
  3663. AsmL.Remove(hp2);
  3664. hp2.Free;
  3665. (*
  3666. {$ifdef x86_64}
  3667. end
  3668. else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
  3669. { This code sequence does not get generated - however it might become useful
  3670. if and when 128-bit signed integer types make an appearance, so the code
  3671. is kept here for when it is eventually needed. [Kit] }
  3672. (
  3673. (
  3674. (taicpu(hp1).oper[1]^.reg = NR_RAX) and
  3675. (
  3676. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  3677. MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
  3678. )
  3679. ) or
  3680. (
  3681. (taicpu(hp1).oper[1]^.reg = NR_RDX) and
  3682. (
  3683. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  3684. MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
  3685. )
  3686. )
  3687. ) and
  3688. GetNextInstruction(hp1, hp2) and
  3689. MatchInstruction(hp2, A_SAR, [S_Q]) and
  3690. MatchOperand(taicpu(hp2).oper[0]^, 63) and
  3691. MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
  3692. begin
  3693. { Change:
  3694. movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
  3695. movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
  3696. sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
  3697. To:
  3698. movq r/m,%rax <- Note the change in register
  3699. cqto
  3700. }
  3701. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
  3702. AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
  3703. taicpu(p).loadreg(1, NR_RAX);
  3704. taicpu(hp1).opcode := A_CQO;
  3705. taicpu(hp1).clearop(1);
  3706. taicpu(hp1).clearop(0);
  3707. taicpu(hp1).ops:=0;
  3708. AsmL.Remove(hp2);
  3709. hp2.Free;
  3710. {$endif x86_64}
  3711. *)
  3712. end;
  3713. end;
  3714. end
  3715. else if (taicpu(p).oper[0]^.typ = top_ref) and
  3716. (hp1.typ = ait_instruction) and
  3717. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  3718. doing it separately in both branches allows to do the cheap checks
  3719. with low probability earlier }
  3720. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3721. GetNextInstruction(hp1,hp2) and
  3722. MatchInstruction(hp2,A_MOV,[])
  3723. ) or
  3724. ((taicpu(hp1).opcode=A_LEA) and
  3725. GetNextInstruction(hp1,hp2) and
  3726. MatchInstruction(hp2,A_MOV,[]) and
  3727. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3728. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  3729. ) or
  3730. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  3731. taicpu(p).oper[1]^.reg) and
  3732. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  3733. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  3734. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  3735. ) and
  3736. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  3737. )
  3738. ) and
  3739. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  3740. (taicpu(hp2).oper[1]^.typ = top_ref) then
  3741. begin
  3742. TransferUsedRegs(TmpUsedRegs);
  3743. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  3744. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  3745. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  3746. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  3747. { change mov (ref), reg
  3748. add/sub/or/... reg2/$const, reg
  3749. mov reg, (ref)
  3750. # release reg
  3751. to add/sub/or/... reg2/$const, (ref) }
  3752. begin
  3753. case taicpu(hp1).opcode of
  3754. A_INC,A_DEC,A_NOT,A_NEG :
  3755. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  3756. A_LEA :
  3757. begin
  3758. taicpu(hp1).opcode:=A_ADD;
  3759. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  3760. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  3761. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  3762. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  3763. else
  3764. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  3765. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3766. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  3767. end
  3768. else
  3769. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  3770. end;
  3771. asml.remove(p);
  3772. asml.remove(hp2);
  3773. p.free;
  3774. hp2.free;
  3775. p := hp1
  3776. end;
  3777. Exit;
  3778. {$ifdef x86_64}
  3779. end
  3780. else if (taicpu(p).opsize = S_L) and
  3781. (taicpu(p).oper[1]^.typ = top_reg) and
  3782. (
  3783. MatchInstruction(hp1, A_MOV,[]) and
  3784. (taicpu(hp1).opsize = S_L) and
  3785. (taicpu(hp1).oper[1]^.typ = top_reg)
  3786. ) and (
  3787. GetNextInstruction(hp1, hp2) and
  3788. (tai(hp2).typ=ait_instruction) and
  3789. (taicpu(hp2).opsize = S_Q) and
  3790. (
  3791. (
  3792. MatchInstruction(hp2, A_ADD,[]) and
  3793. (taicpu(hp2).opsize = S_Q) and
  3794. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3795. (
  3796. (
  3797. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3798. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3799. ) or (
  3800. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3801. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3802. )
  3803. )
  3804. ) or (
  3805. MatchInstruction(hp2, A_LEA,[]) and
  3806. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  3807. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  3808. (
  3809. (
  3810. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  3811. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3812. ) or (
  3813. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3814. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  3815. )
  3816. ) and (
  3817. (
  3818. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  3819. ) or (
  3820. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  3821. )
  3822. )
  3823. )
  3824. )
  3825. ) and (
  3826. GetNextInstruction(hp2, hp3) and
  3827. MatchInstruction(hp3, A_SHR,[]) and
  3828. (taicpu(hp3).opsize = S_Q) and
  3829. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  3830. (taicpu(hp3).oper[0]^.val = 1) and
  3831. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  3832. ) then
  3833. begin
  3834. { Change movl x, reg1d movl x, reg1d
  3835. movl y, reg2d movl y, reg2d
  3836. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  3837. shrq $1, reg1q shrq $1, reg1q
  3838. ( reg1d and reg2d can be switched around in the first two instructions )
  3839. To movl x, reg1d
  3840. addl y, reg1d
  3841. rcrl $1, reg1d
  3842. This corresponds to the common expression (x + y) shr 1, where
  3843. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  3844. smaller code, but won't account for x + y causing an overflow). [Kit]
  3845. }
  3846. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3847. { Change first MOV command to have the same register as the final output }
  3848. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  3849. else
  3850. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  3851. { Change second MOV command to an ADD command. This is easier than
  3852. converting the existing command because it means we don't have to
  3853. touch 'y', which might be a complicated reference, and also the
  3854. fact that the third command might either be ADD or LEA. [Kit] }
  3855. taicpu(hp1).opcode := A_ADD;
  3856. { Delete old ADD/LEA instruction }
  3857. asml.remove(hp2);
  3858. hp2.free;
  3859. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  3860. taicpu(hp3).opcode := A_RCR;
  3861. taicpu(hp3).changeopsize(S_L);
  3862. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  3863. {$endif x86_64}
  3864. end;
  3865. end;
  3866. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  3867. var
  3868. hp1 : tai;
  3869. begin
  3870. Result:=false;
  3871. if (taicpu(p).ops >= 2) and
  3872. ((taicpu(p).oper[0]^.typ = top_const) or
  3873. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  3874. (taicpu(p).oper[1]^.typ = top_reg) and
  3875. ((taicpu(p).ops = 2) or
  3876. ((taicpu(p).oper[2]^.typ = top_reg) and
  3877. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  3878. GetLastInstruction(p,hp1) and
  3879. MatchInstruction(hp1,A_MOV,[]) and
  3880. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3881. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3882. begin
  3883. TransferUsedRegs(TmpUsedRegs);
  3884. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  3885. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  3886. { change
  3887. mov reg1,reg2
  3888. imul y,reg2 to imul y,reg1,reg2 }
  3889. begin
  3890. taicpu(p).ops := 3;
  3891. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  3892. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  3893. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  3894. asml.remove(hp1);
  3895. hp1.free;
  3896. result:=true;
  3897. end;
  3898. end;
  3899. end;
  3900. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  3901. var
  3902. ThisLabel: TAsmLabel;
  3903. begin
  3904. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  3905. ThisLabel.decrefs;
  3906. taicpu(p).opcode := A_RET;
  3907. taicpu(p).is_jmp := false;
  3908. taicpu(p).ops := taicpu(ret_p).ops;
  3909. case taicpu(ret_p).ops of
  3910. 0:
  3911. taicpu(p).clearop(0);
  3912. 1:
  3913. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  3914. else
  3915. internalerror(2016041301);
  3916. end;
  3917. { If the original label is now dead, it might turn out that the label
  3918. immediately follows p. As a result, everything beyond it, which will
  3919. be just some final register configuration and a RET instruction, is
  3920. now dead code. [Kit] }
  3921. { NOTE: This is much faster than introducing a OptPass2RET routine and
  3922. running RemoveDeadCodeAfterJump for each RET instruction, because
  3923. this optimisation rarely happens and most RETs appear at the end of
  3924. routines where there is nothing that can be stripped. [Kit] }
  3925. if not ThisLabel.is_used then
  3926. RemoveDeadCodeAfterJump(p);
  3927. end;
  3928. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  3929. var
  3930. hp1, hp2, hp3: tai;
  3931. OperIdx: Integer;
  3932. begin
  3933. result:=false;
  3934. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  3935. (taicpu(p).oper[0]^.ref^.index=NR_NO) and (taicpu(p).oper[0]^.ref^.symbol is tasmlabel) then
  3936. begin
  3937. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  3938. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  3939. begin
  3940. case taicpu(hp1).opcode of
  3941. A_RET:
  3942. {
  3943. change
  3944. jmp .L1
  3945. ...
  3946. .L1:
  3947. ret
  3948. into
  3949. ret
  3950. }
  3951. begin
  3952. ConvertJumpToRET(p, hp1);
  3953. result:=true;
  3954. end;
  3955. A_MOV:
  3956. {
  3957. change
  3958. jmp .L1
  3959. ...
  3960. .L1:
  3961. mov ##, ##
  3962. ret
  3963. into
  3964. mov ##, ##
  3965. ret
  3966. }
  3967. { This optimisation tends to increase code size if the pass 1 MOV optimisations aren't
  3968. re-run, so only do this particular optimisation if optimising for speed or when
  3969. optimisations are very in-depth. [Kit] }
  3970. if (current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size] then
  3971. begin
  3972. GetNextInstruction(hp1, hp2);
  3973. if not Assigned(hp2) then
  3974. Exit;
  3975. if (hp2.typ in [ait_label, ait_align]) then
  3976. SkipLabels(hp2,hp2);
  3977. if Assigned(hp2) and MatchInstruction(hp2, A_RET, [S_NO]) then
  3978. begin
  3979. { Duplicate the MOV instruction }
  3980. hp3:=tai(hp1.getcopy);
  3981. asml.InsertBefore(hp3, p);
  3982. { Make sure the compiler knows about any final registers written here }
  3983. for OperIdx := 0 to 1 do
  3984. with taicpu(hp3).oper[OperIdx]^ do
  3985. begin
  3986. case typ of
  3987. top_ref:
  3988. begin
  3989. if (ref^.base <> NR_NO) {$ifdef x86_64} and (ref^.base <> NR_RIP) {$endif x86_64} then
  3990. AllocRegBetween(ref^.base, hp3, tai(p.Next), UsedRegs);
  3991. if (ref^.index <> NR_NO) {$ifdef x86_64} and (ref^.index <> NR_RIP) {$endif x86_64} then
  3992. AllocRegBetween(ref^.index, hp3, tai(p.Next), UsedRegs);
  3993. end;
  3994. top_reg:
  3995. AllocRegBetween(reg, hp3, tai(p.Next), UsedRegs);
  3996. else
  3997. ;
  3998. end;
  3999. end;
  4000. { Now change the jump into a RET instruction }
  4001. ConvertJumpToRET(p, hp2);
  4002. result:=true;
  4003. end;
  4004. end;
  4005. else
  4006. ;
  4007. end;
  4008. end;
  4009. end;
  4010. end;
  4011. function CanBeCMOV(p : tai) : boolean;
  4012. begin
  4013. CanBeCMOV:=assigned(p) and
  4014. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  4015. { we can't use cmov ref,reg because
  4016. ref could be nil and cmov still throws an exception
  4017. if ref=nil but the mov isn't done (FK)
  4018. or ((taicpu(p).oper[0]^.typ = top_ref) and
  4019. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  4020. }
  4021. (MatchOpType(taicpu(p),top_reg,top_reg) or
  4022. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  4023. it is not expected that this can cause a seg. violation }
  4024. (MatchOpType(taicpu(p),top_ref,top_reg) and
  4025. (((taicpu(p).oper[0]^.ref^.base=NR_NO) and (taicpu(p).oper[0]^.ref^.refaddr=addr_no)){$ifdef x86_64} or
  4026. ((taicpu(p).oper[0]^.ref^.base=NR_RIP) and (taicpu(p).oper[0]^.ref^.refaddr=addr_pic)){$endif x86_64}
  4027. ) and
  4028. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  4029. (taicpu(p).oper[0]^.ref^.offset=0)
  4030. )
  4031. );
  4032. end;
  4033. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  4034. var
  4035. hp1,hp2,hp3,hp4,hpmov2: tai;
  4036. carryadd_opcode : TAsmOp;
  4037. l : Longint;
  4038. condition : TAsmCond;
  4039. symbol: TAsmSymbol;
  4040. begin
  4041. result:=false;
  4042. symbol:=nil;
  4043. if GetNextInstruction(p,hp1) then
  4044. begin
  4045. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  4046. if (hp1.typ=ait_instruction) and
  4047. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  4048. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  4049. { jb @@1 cmc
  4050. inc/dec operand --> adc/sbb operand,0
  4051. @@1:
  4052. ... and ...
  4053. jnb @@1
  4054. inc/dec operand --> adc/sbb operand,0
  4055. @@1: }
  4056. begin
  4057. carryadd_opcode:=A_NONE;
  4058. if Taicpu(p).condition in [C_NAE,C_B] then
  4059. begin
  4060. if Taicpu(hp1).opcode=A_INC then
  4061. carryadd_opcode:=A_ADC;
  4062. if Taicpu(hp1).opcode=A_DEC then
  4063. carryadd_opcode:=A_SBB;
  4064. if carryadd_opcode<>A_NONE then
  4065. begin
  4066. Taicpu(p).clearop(0);
  4067. Taicpu(p).ops:=0;
  4068. Taicpu(p).is_jmp:=false;
  4069. Taicpu(p).opcode:=A_CMC;
  4070. Taicpu(p).condition:=C_NONE;
  4071. Taicpu(hp1).ops:=2;
  4072. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  4073. Taicpu(hp1).loadconst(0,0);
  4074. Taicpu(hp1).opcode:=carryadd_opcode;
  4075. result:=true;
  4076. exit;
  4077. end;
  4078. end;
  4079. if Taicpu(p).condition in [C_AE,C_NB] then
  4080. begin
  4081. if Taicpu(hp1).opcode=A_INC then
  4082. carryadd_opcode:=A_ADC;
  4083. if Taicpu(hp1).opcode=A_DEC then
  4084. carryadd_opcode:=A_SBB;
  4085. if carryadd_opcode<>A_NONE then
  4086. begin
  4087. asml.remove(p);
  4088. p.free;
  4089. Taicpu(hp1).ops:=2;
  4090. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  4091. Taicpu(hp1).loadconst(0,0);
  4092. Taicpu(hp1).opcode:=carryadd_opcode;
  4093. p:=hp1;
  4094. result:=true;
  4095. exit;
  4096. end;
  4097. end;
  4098. end;
  4099. { Detect the following:
  4100. jmp<cond> @Lbl1
  4101. jmp @Lbl2
  4102. ...
  4103. @Lbl1:
  4104. ret
  4105. Change to:
  4106. jmp<inv_cond> @Lbl2
  4107. ret
  4108. }
  4109. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  4110. begin
  4111. hp2:=getlabelwithsym(TAsmLabel(symbol));
  4112. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  4113. MatchInstruction(hp2,A_RET,[S_NO]) then
  4114. begin
  4115. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  4116. { Change label address to that of the unconditional jump }
  4117. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  4118. TAsmLabel(symbol).DecRefs;
  4119. taicpu(hp1).opcode := A_RET;
  4120. taicpu(hp1).is_jmp := false;
  4121. taicpu(hp1).ops := taicpu(hp2).ops;
  4122. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  4123. case taicpu(hp2).ops of
  4124. 0:
  4125. taicpu(hp1).clearop(0);
  4126. 1:
  4127. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  4128. else
  4129. internalerror(2016041302);
  4130. end;
  4131. end;
  4132. end;
  4133. end;
  4134. {$ifndef i8086}
  4135. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  4136. begin
  4137. { check for
  4138. jCC xxx
  4139. <several movs>
  4140. xxx:
  4141. }
  4142. l:=0;
  4143. GetNextInstruction(p, hp1);
  4144. while assigned(hp1) and
  4145. CanBeCMOV(hp1) and
  4146. { stop on labels }
  4147. not(hp1.typ=ait_label) do
  4148. begin
  4149. inc(l);
  4150. GetNextInstruction(hp1,hp1);
  4151. end;
  4152. if assigned(hp1) then
  4153. begin
  4154. if FindLabel(tasmlabel(symbol),hp1) then
  4155. begin
  4156. if (l<=4) and (l>0) then
  4157. begin
  4158. condition:=inverse_cond(taicpu(p).condition);
  4159. GetNextInstruction(p,hp1);
  4160. repeat
  4161. if not Assigned(hp1) then
  4162. InternalError(2018062900);
  4163. taicpu(hp1).opcode:=A_CMOVcc;
  4164. taicpu(hp1).condition:=condition;
  4165. UpdateUsedRegs(hp1);
  4166. GetNextInstruction(hp1,hp1);
  4167. until not(CanBeCMOV(hp1));
  4168. { Remember what hp1 is in case there's multiple aligns to get rid of }
  4169. hp2 := hp1;
  4170. repeat
  4171. if not Assigned(hp2) then
  4172. InternalError(2018062910);
  4173. case hp2.typ of
  4174. ait_label:
  4175. { What we expected - break out of the loop (it won't be a dead label at the top of
  4176. a cluster because that was optimised at an earlier stage) }
  4177. Break;
  4178. ait_align:
  4179. { Go to the next entry until a label is found (may be multiple aligns before it) }
  4180. begin
  4181. hp2 := tai(hp2.Next);
  4182. Continue;
  4183. end;
  4184. else
  4185. begin
  4186. { Might be a comment or temporary allocation entry }
  4187. if not (hp2.typ in SkipInstr) then
  4188. InternalError(2018062911);
  4189. hp2 := tai(hp2.Next);
  4190. Continue;
  4191. end;
  4192. end;
  4193. until False;
  4194. { Now we can safely decrement the reference count }
  4195. tasmlabel(symbol).decrefs;
  4196. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  4197. { Remove the original jump }
  4198. asml.Remove(p);
  4199. p.Free;
  4200. GetNextInstruction(hp2, p); { Instruction after the label }
  4201. { Remove the label if this is its final reference }
  4202. if (tasmlabel(symbol).getrefs=0) then
  4203. StripLabelFast(hp1);
  4204. if Assigned(p) then
  4205. begin
  4206. UpdateUsedRegs(p);
  4207. result:=true;
  4208. end;
  4209. exit;
  4210. end;
  4211. end
  4212. else
  4213. begin
  4214. { check further for
  4215. jCC xxx
  4216. <several movs 1>
  4217. jmp yyy
  4218. xxx:
  4219. <several movs 2>
  4220. yyy:
  4221. }
  4222. { hp2 points to jmp yyy }
  4223. hp2:=hp1;
  4224. { skip hp1 to xxx (or an align right before it) }
  4225. GetNextInstruction(hp1, hp1);
  4226. if assigned(hp2) and
  4227. assigned(hp1) and
  4228. (l<=3) and
  4229. (hp2.typ=ait_instruction) and
  4230. (taicpu(hp2).is_jmp) and
  4231. (taicpu(hp2).condition=C_None) and
  4232. { real label and jump, no further references to the
  4233. label are allowed }
  4234. (tasmlabel(symbol).getrefs=1) and
  4235. FindLabel(tasmlabel(symbol),hp1) then
  4236. begin
  4237. l:=0;
  4238. { skip hp1 to <several moves 2> }
  4239. if (hp1.typ = ait_align) then
  4240. GetNextInstruction(hp1, hp1);
  4241. GetNextInstruction(hp1, hpmov2);
  4242. hp1 := hpmov2;
  4243. while assigned(hp1) and
  4244. CanBeCMOV(hp1) do
  4245. begin
  4246. inc(l);
  4247. GetNextInstruction(hp1, hp1);
  4248. end;
  4249. { hp1 points to yyy (or an align right before it) }
  4250. hp3 := hp1;
  4251. if assigned(hp1) and
  4252. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  4253. begin
  4254. condition:=inverse_cond(taicpu(p).condition);
  4255. GetNextInstruction(p,hp1);
  4256. repeat
  4257. taicpu(hp1).opcode:=A_CMOVcc;
  4258. taicpu(hp1).condition:=condition;
  4259. UpdateUsedRegs(hp1);
  4260. GetNextInstruction(hp1,hp1);
  4261. until not(assigned(hp1)) or
  4262. not(CanBeCMOV(hp1));
  4263. condition:=inverse_cond(condition);
  4264. hp1 := hpmov2;
  4265. { hp1 is now at <several movs 2> }
  4266. while Assigned(hp1) and CanBeCMOV(hp1) do
  4267. begin
  4268. taicpu(hp1).opcode:=A_CMOVcc;
  4269. taicpu(hp1).condition:=condition;
  4270. UpdateUsedRegs(hp1);
  4271. GetNextInstruction(hp1,hp1);
  4272. end;
  4273. hp1 := p;
  4274. { Get first instruction after label }
  4275. GetNextInstruction(hp3, p);
  4276. if assigned(p) and (hp3.typ = ait_align) then
  4277. GetNextInstruction(p, p);
  4278. { Don't dereference yet, as doing so will cause
  4279. GetNextInstruction to skip the label and
  4280. optional align marker. [Kit] }
  4281. GetNextInstruction(hp2, hp4);
  4282. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  4283. { remove jCC }
  4284. asml.remove(hp1);
  4285. hp1.free;
  4286. { Now we can safely decrement it }
  4287. tasmlabel(symbol).decrefs;
  4288. { Remove label xxx (it will have a ref of zero due to the initial check }
  4289. StripLabelFast(hp4);
  4290. { remove jmp }
  4291. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  4292. asml.remove(hp2);
  4293. hp2.free;
  4294. { As before, now we can safely decrement it }
  4295. tasmlabel(symbol).decrefs;
  4296. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  4297. if tasmlabel(symbol).getrefs = 0 then
  4298. StripLabelFast(hp3);
  4299. if Assigned(p) then
  4300. begin
  4301. UpdateUsedRegs(p);
  4302. result:=true;
  4303. end;
  4304. exit;
  4305. end;
  4306. end;
  4307. end;
  4308. end;
  4309. end;
  4310. {$endif i8086}
  4311. end;
  4312. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  4313. var
  4314. hp1,hp2: tai;
  4315. begin
  4316. result:=false;
  4317. if (taicpu(p).oper[1]^.typ = top_reg) and
  4318. GetNextInstruction(p,hp1) and
  4319. (hp1.typ = ait_instruction) and
  4320. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  4321. GetNextInstruction(hp1,hp2) and
  4322. MatchInstruction(hp2,A_MOV,[]) and
  4323. (taicpu(hp2).oper[0]^.typ = top_reg) and
  4324. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  4325. {$ifdef i386}
  4326. { not all registers have byte size sub registers on i386 }
  4327. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  4328. {$endif i386}
  4329. (((taicpu(hp1).ops=2) and
  4330. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  4331. ((taicpu(hp1).ops=1) and
  4332. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  4333. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  4334. begin
  4335. { change movsX/movzX reg/ref, reg2
  4336. add/sub/or/... reg3/$const, reg2
  4337. mov reg2 reg/ref
  4338. to add/sub/or/... reg3/$const, reg/ref }
  4339. { by example:
  4340. movswl %si,%eax movswl %si,%eax p
  4341. decl %eax addl %edx,%eax hp1
  4342. movw %ax,%si movw %ax,%si hp2
  4343. ->
  4344. movswl %si,%eax movswl %si,%eax p
  4345. decw %eax addw %edx,%eax hp1
  4346. movw %ax,%si movw %ax,%si hp2
  4347. }
  4348. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  4349. {
  4350. ->
  4351. movswl %si,%eax movswl %si,%eax p
  4352. decw %si addw %dx,%si hp1
  4353. movw %ax,%si movw %ax,%si hp2
  4354. }
  4355. case taicpu(hp1).ops of
  4356. 1:
  4357. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  4358. 2:
  4359. begin
  4360. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  4361. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  4362. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  4363. end;
  4364. else
  4365. internalerror(2008042701);
  4366. end;
  4367. {
  4368. ->
  4369. decw %si addw %dx,%si p
  4370. }
  4371. DebugMsg(SPeepholeOptimization + 'var3',p);
  4372. asml.remove(p);
  4373. asml.remove(hp2);
  4374. p.free;
  4375. hp2.free;
  4376. p:=hp1;
  4377. end
  4378. else if taicpu(p).opcode=A_MOVZX then
  4379. begin
  4380. { removes superfluous And's after movzx's }
  4381. if (taicpu(p).oper[1]^.typ = top_reg) and
  4382. GetNextInstruction(p, hp1) and
  4383. (tai(hp1).typ = ait_instruction) and
  4384. (taicpu(hp1).opcode = A_AND) and
  4385. (taicpu(hp1).oper[0]^.typ = top_const) and
  4386. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4387. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4388. begin
  4389. case taicpu(p).opsize Of
  4390. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  4391. if (taicpu(hp1).oper[0]^.val = $ff) then
  4392. begin
  4393. DebugMsg(SPeepholeOptimization + 'var4',p);
  4394. asml.remove(hp1);
  4395. hp1.free;
  4396. end;
  4397. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  4398. if (taicpu(hp1).oper[0]^.val = $ffff) then
  4399. begin
  4400. DebugMsg(SPeepholeOptimization + 'var5',p);
  4401. asml.remove(hp1);
  4402. hp1.free;
  4403. end;
  4404. {$ifdef x86_64}
  4405. S_LQ:
  4406. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  4407. begin
  4408. if (cs_asm_source in current_settings.globalswitches) then
  4409. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  4410. asml.remove(hp1);
  4411. hp1.Free;
  4412. end;
  4413. {$endif x86_64}
  4414. else
  4415. ;
  4416. end;
  4417. end;
  4418. { changes some movzx constructs to faster synonims (all examples
  4419. are given with eax/ax, but are also valid for other registers)}
  4420. if (taicpu(p).oper[1]^.typ = top_reg) then
  4421. if (taicpu(p).oper[0]^.typ = top_reg) then
  4422. case taicpu(p).opsize of
  4423. S_BW:
  4424. begin
  4425. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  4426. not(cs_opt_size in current_settings.optimizerswitches) then
  4427. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  4428. begin
  4429. taicpu(p).opcode := A_AND;
  4430. taicpu(p).changeopsize(S_W);
  4431. taicpu(p).loadConst(0,$ff);
  4432. DebugMsg(SPeepholeOptimization + 'var7',p);
  4433. end
  4434. else if GetNextInstruction(p, hp1) and
  4435. (tai(hp1).typ = ait_instruction) and
  4436. (taicpu(hp1).opcode = A_AND) and
  4437. (taicpu(hp1).oper[0]^.typ = top_const) and
  4438. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4439. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4440. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  4441. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  4442. begin
  4443. DebugMsg(SPeepholeOptimization + 'var8',p);
  4444. taicpu(p).opcode := A_MOV;
  4445. taicpu(p).changeopsize(S_W);
  4446. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  4447. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4448. end;
  4449. end;
  4450. S_BL:
  4451. begin
  4452. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  4453. not(cs_opt_size in current_settings.optimizerswitches) then
  4454. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  4455. begin
  4456. taicpu(p).opcode := A_AND;
  4457. taicpu(p).changeopsize(S_L);
  4458. taicpu(p).loadConst(0,$ff)
  4459. end
  4460. else if GetNextInstruction(p, hp1) and
  4461. (tai(hp1).typ = ait_instruction) and
  4462. (taicpu(hp1).opcode = A_AND) and
  4463. (taicpu(hp1).oper[0]^.typ = top_const) and
  4464. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4465. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4466. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  4467. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  4468. begin
  4469. DebugMsg(SPeepholeOptimization + 'var10',p);
  4470. taicpu(p).opcode := A_MOV;
  4471. taicpu(p).changeopsize(S_L);
  4472. { do not use R_SUBWHOLE
  4473. as movl %rdx,%eax
  4474. is invalid in assembler PM }
  4475. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4476. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4477. end
  4478. end;
  4479. {$ifndef i8086}
  4480. S_WL:
  4481. begin
  4482. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  4483. not(cs_opt_size in current_settings.optimizerswitches) then
  4484. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  4485. begin
  4486. DebugMsg(SPeepholeOptimization + 'var11',p);
  4487. taicpu(p).opcode := A_AND;
  4488. taicpu(p).changeopsize(S_L);
  4489. taicpu(p).loadConst(0,$ffff);
  4490. end
  4491. else if GetNextInstruction(p, hp1) and
  4492. (tai(hp1).typ = ait_instruction) and
  4493. (taicpu(hp1).opcode = A_AND) and
  4494. (taicpu(hp1).oper[0]^.typ = top_const) and
  4495. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4496. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4497. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  4498. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  4499. begin
  4500. DebugMsg(SPeepholeOptimization + 'var12',p);
  4501. taicpu(p).opcode := A_MOV;
  4502. taicpu(p).changeopsize(S_L);
  4503. { do not use R_SUBWHOLE
  4504. as movl %rdx,%eax
  4505. is invalid in assembler PM }
  4506. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  4507. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  4508. end;
  4509. end;
  4510. {$endif i8086}
  4511. else
  4512. ;
  4513. end
  4514. else if (taicpu(p).oper[0]^.typ = top_ref) then
  4515. begin
  4516. if GetNextInstruction(p, hp1) and
  4517. (tai(hp1).typ = ait_instruction) and
  4518. (taicpu(hp1).opcode = A_AND) and
  4519. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4520. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  4521. begin
  4522. //taicpu(p).opcode := A_MOV;
  4523. case taicpu(p).opsize Of
  4524. S_BL:
  4525. begin
  4526. DebugMsg(SPeepholeOptimization + 'var13',p);
  4527. taicpu(hp1).changeopsize(S_L);
  4528. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4529. end;
  4530. S_WL:
  4531. begin
  4532. DebugMsg(SPeepholeOptimization + 'var14',p);
  4533. taicpu(hp1).changeopsize(S_L);
  4534. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  4535. end;
  4536. S_BW:
  4537. begin
  4538. DebugMsg(SPeepholeOptimization + 'var15',p);
  4539. taicpu(hp1).changeopsize(S_W);
  4540. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  4541. end;
  4542. {$ifdef x86_64}
  4543. S_BQ:
  4544. begin
  4545. DebugMsg(SPeepholeOptimization + 'var16',p);
  4546. taicpu(hp1).changeopsize(S_Q);
  4547. taicpu(hp1).loadConst(
  4548. 0, taicpu(hp1).oper[0]^.val and $ff);
  4549. end;
  4550. S_WQ:
  4551. begin
  4552. DebugMsg(SPeepholeOptimization + 'var17',p);
  4553. taicpu(hp1).changeopsize(S_Q);
  4554. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  4555. end;
  4556. S_LQ:
  4557. begin
  4558. DebugMsg(SPeepholeOptimization + 'var18',p);
  4559. taicpu(hp1).changeopsize(S_Q);
  4560. taicpu(hp1).loadConst(
  4561. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  4562. end;
  4563. {$endif x86_64}
  4564. else
  4565. Internalerror(2017050704)
  4566. end;
  4567. end;
  4568. end;
  4569. end;
  4570. end;
  4571. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  4572. var
  4573. hp1 : tai;
  4574. MaskLength : Cardinal;
  4575. begin
  4576. Result:=false;
  4577. if GetNextInstruction(p, hp1) then
  4578. begin
  4579. if MatchOpType(taicpu(p),top_const,top_reg) and
  4580. MatchInstruction(hp1,A_AND,[]) and
  4581. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4582. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4583. { the second register must contain the first one, so compare their subreg types }
  4584. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  4585. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  4586. { change
  4587. and const1, reg
  4588. and const2, reg
  4589. to
  4590. and (const1 and const2), reg
  4591. }
  4592. begin
  4593. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  4594. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  4595. asml.remove(p);
  4596. p.Free;
  4597. p:=hp1;
  4598. Result:=true;
  4599. exit;
  4600. end
  4601. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4602. MatchInstruction(hp1,A_MOVZX,[]) and
  4603. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4604. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4605. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4606. (((taicpu(p).opsize=S_W) and
  4607. (taicpu(hp1).opsize=S_BW)) or
  4608. ((taicpu(p).opsize=S_L) and
  4609. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4610. {$ifdef x86_64}
  4611. or
  4612. ((taicpu(p).opsize=S_Q) and
  4613. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  4614. {$endif x86_64}
  4615. ) then
  4616. begin
  4617. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4618. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  4619. ) or
  4620. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4621. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  4622. then
  4623. begin
  4624. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  4625. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  4626. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  4627. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  4628. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  4629. }
  4630. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  4631. asml.remove(hp1);
  4632. hp1.free;
  4633. Exit;
  4634. end;
  4635. end
  4636. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4637. MatchInstruction(hp1,A_SHL,[]) and
  4638. MatchOpType(taicpu(hp1),top_const,top_reg) and
  4639. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  4640. begin
  4641. {$ifopt R+}
  4642. {$define RANGE_WAS_ON}
  4643. {$R-}
  4644. {$endif}
  4645. { get length of potential and mask }
  4646. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  4647. { really a mask? }
  4648. {$ifdef RANGE_WAS_ON}
  4649. {$R+}
  4650. {$endif}
  4651. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  4652. { unmasked part shifted out? }
  4653. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  4654. begin
  4655. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  4656. { take care of the register (de)allocs following p }
  4657. UpdateUsedRegs(tai(p.next));
  4658. asml.remove(p);
  4659. p.free;
  4660. p:=hp1;
  4661. Result:=true;
  4662. exit;
  4663. end;
  4664. end
  4665. else if MatchOpType(taicpu(p),top_const,top_reg) and
  4666. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  4667. (taicpu(hp1).oper[0]^.typ = top_reg) and
  4668. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  4669. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  4670. (((taicpu(p).opsize=S_W) and
  4671. (taicpu(hp1).opsize=S_BW)) or
  4672. ((taicpu(p).opsize=S_L) and
  4673. (taicpu(hp1).opsize in [S_WL,S_BL]))
  4674. {$ifdef x86_64}
  4675. or
  4676. ((taicpu(p).opsize=S_Q) and
  4677. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  4678. {$endif x86_64}
  4679. ) then
  4680. begin
  4681. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  4682. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  4683. ) or
  4684. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  4685. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  4686. {$ifdef x86_64}
  4687. or
  4688. (((taicpu(hp1).opsize)=S_LQ) and
  4689. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  4690. )
  4691. {$endif x86_64}
  4692. then
  4693. begin
  4694. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  4695. asml.remove(hp1);
  4696. hp1.free;
  4697. Exit;
  4698. end;
  4699. end
  4700. else if (taicpu(p).oper[1]^.typ = top_reg) and
  4701. (hp1.typ = ait_instruction) and
  4702. (taicpu(hp1).is_jmp) and
  4703. (taicpu(hp1).opcode<>A_JMP) and
  4704. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  4705. begin
  4706. { change
  4707. and x, reg
  4708. jxx
  4709. to
  4710. test x, reg
  4711. jxx
  4712. if reg is deallocated before the
  4713. jump, but only if it's a conditional jump (PFV)
  4714. }
  4715. taicpu(p).opcode := A_TEST;
  4716. Exit;
  4717. end;
  4718. end;
  4719. { Lone AND tests }
  4720. if MatchOpType(taicpu(p),top_const,top_reg) then
  4721. begin
  4722. {
  4723. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  4724. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  4725. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  4726. }
  4727. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  4728. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  4729. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  4730. begin
  4731. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  4732. end;
  4733. end;
  4734. end;
  4735. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  4736. begin
  4737. Result:=false;
  4738. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4739. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  4740. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  4741. begin
  4742. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  4743. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  4744. taicpu(p).opcode:=A_ADD;
  4745. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  4746. result:=true;
  4747. end
  4748. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  4749. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  4750. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  4751. begin
  4752. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  4753. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  4754. taicpu(p).opcode:=A_ADD;
  4755. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  4756. result:=true;
  4757. end;
  4758. end;
  4759. function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
  4760. var
  4761. hp1, hp2: tai; NewRef: TReference;
  4762. begin
  4763. { Change:
  4764. subl/q $x,%reg1
  4765. movl/q %reg1,%reg2
  4766. To:
  4767. leal/q $-x(%reg1),%reg2
  4768. subl/q $x,%reg1
  4769. Breaks the dependency chain and potentially permits the removal of
  4770. a CMP instruction if one follows.
  4771. }
  4772. Result := False;
  4773. if not (cs_opt_size in current_settings.optimizerswitches) and
  4774. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  4775. MatchOpType(taicpu(p),top_const,top_reg) and
  4776. GetNextInstruction(p, hp1) and
  4777. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  4778. (taicpu(hp1).oper[1]^.typ = top_reg) and
  4779. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
  4780. begin
  4781. { Change the MOV instruction to a LEA instruction, and update the
  4782. first operand }
  4783. reference_reset(NewRef, 1, []);
  4784. NewRef.base := taicpu(p).oper[1]^.reg;
  4785. NewRef.scalefactor := 1;
  4786. NewRef.offset := -taicpu(p).oper[0]^.val;
  4787. taicpu(hp1).opcode := A_LEA;
  4788. taicpu(hp1).loadref(0, NewRef);
  4789. { Move what is now the LEA instruction to before the SUB instruction }
  4790. Asml.Remove(hp1);
  4791. Asml.InsertBefore(hp1, p);
  4792. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  4793. DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
  4794. Result := True;
  4795. end;
  4796. end;
  4797. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  4798. function SkipSimpleInstructions(var hp1 : tai) : Boolean;
  4799. begin
  4800. { we can skip all instructions not messing with the stack pointer }
  4801. while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  4802. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  4803. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  4804. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  4805. ({(taicpu(hp1).ops=0) or }
  4806. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  4807. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  4808. ) and }
  4809. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  4810. )
  4811. ) do
  4812. GetNextInstruction(hp1,hp1);
  4813. Result:=assigned(hp1);
  4814. end;
  4815. var
  4816. hp1, hp2, hp3: tai;
  4817. begin
  4818. Result:=false;
  4819. { replace
  4820. leal(q) x(<stackpointer>),<stackpointer>
  4821. call procname
  4822. leal(q) -x(<stackpointer>),<stackpointer>
  4823. ret
  4824. by
  4825. jmp procname
  4826. but do it only on level 4 because it destroys stack back traces
  4827. }
  4828. if (cs_opt_level4 in current_settings.optimizerswitches) and
  4829. MatchOpType(taicpu(p),top_ref,top_reg) and
  4830. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4831. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  4832. { the -8 or -24 are not required, but bail out early if possible,
  4833. higher values are unlikely }
  4834. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  4835. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  4836. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  4837. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  4838. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  4839. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4840. GetNextInstruction(p, hp1) and
  4841. { trick to skip label }
  4842. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  4843. SkipSimpleInstructions(hp1) and
  4844. MatchInstruction(hp1,A_CALL,[S_NO]) and
  4845. GetNextInstruction(hp1, hp2) and
  4846. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  4847. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  4848. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  4849. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  4850. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  4851. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  4852. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  4853. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  4854. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  4855. GetNextInstruction(hp2, hp3) and
  4856. { trick to skip label }
  4857. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  4858. MatchInstruction(hp3,A_RET,[S_NO]) and
  4859. (taicpu(hp3).ops=0) then
  4860. begin
  4861. taicpu(hp1).opcode := A_JMP;
  4862. taicpu(hp1).is_jmp := true;
  4863. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  4864. RemoveCurrentP(p);
  4865. AsmL.Remove(hp2);
  4866. hp2.free;
  4867. AsmL.Remove(hp3);
  4868. hp3.free;
  4869. Result:=true;
  4870. end;
  4871. end;
  4872. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  4873. var
  4874. Value, RegName: string;
  4875. begin
  4876. Result:=false;
  4877. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  4878. begin
  4879. case taicpu(p).oper[0]^.val of
  4880. 0:
  4881. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  4882. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4883. begin
  4884. { change "mov $0,%reg" into "xor %reg,%reg" }
  4885. taicpu(p).opcode := A_XOR;
  4886. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  4887. Result := True;
  4888. end;
  4889. $1..$FFFFFFFF:
  4890. begin
  4891. { Code size reduction by J. Gareth "Kit" Moreton }
  4892. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  4893. case taicpu(p).opsize of
  4894. S_Q:
  4895. begin
  4896. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  4897. Value := debug_tostr(taicpu(p).oper[0]^.val);
  4898. { The actual optimization }
  4899. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  4900. taicpu(p).changeopsize(S_L);
  4901. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  4902. Result := True;
  4903. end;
  4904. else
  4905. { Do nothing };
  4906. end;
  4907. end;
  4908. -1:
  4909. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  4910. if (cs_opt_size in current_settings.optimizerswitches) and
  4911. (taicpu(p).opsize <> S_B) and
  4912. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  4913. begin
  4914. { change "mov $-1,%reg" into "or $-1,%reg" }
  4915. { NOTES:
  4916. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  4917. - This operation creates a false dependency on the register, so only do it when optimising for size
  4918. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  4919. }
  4920. taicpu(p).opcode := A_OR;
  4921. Result := True;
  4922. end;
  4923. end;
  4924. end;
  4925. end;
  4926. function TX86AsmOptimizer.PostPeepholeOptMOVSX(var p : tai) : boolean;
  4927. begin
  4928. Result := False;
  4929. if not MatchOpType(taicpu(p), top_reg, top_reg) then
  4930. Exit;
  4931. { Convert:
  4932. movswl %ax,%eax -> cwtl
  4933. movslq %eax,%rax -> cdqe
  4934. NOTE: Don't convert movswl %al,%ax to cbw, because cbw and cwde
  4935. refer to the same opcode and depends only on the assembler's
  4936. current operand-size attribute. [Kit]
  4937. }
  4938. with taicpu(p) do
  4939. case opsize of
  4940. S_WL:
  4941. if (oper[0]^.reg = NR_AX) and (oper[1]^.reg = NR_EAX) then
  4942. begin
  4943. DebugMsg(SPeepholeOptimization + 'Converted movswl %ax,%eax to cwtl', p);
  4944. opcode := A_CWDE;
  4945. clearop(0);
  4946. clearop(1);
  4947. ops := 0;
  4948. Result := True;
  4949. end;
  4950. {$ifdef x86_64}
  4951. S_LQ:
  4952. if (oper[0]^.reg = NR_EAX) and (oper[1]^.reg = NR_RAX) then
  4953. begin
  4954. DebugMsg(SPeepholeOptimization + 'Converted movslq %eax,%rax to cltq', p);
  4955. opcode := A_CDQE;
  4956. clearop(0);
  4957. clearop(1);
  4958. ops := 0;
  4959. Result := True;
  4960. end;
  4961. {$endif x86_64}
  4962. else
  4963. ;
  4964. end;
  4965. end;
  4966. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  4967. begin
  4968. Result:=false;
  4969. { change "cmp $0, %reg" to "test %reg, %reg" }
  4970. if MatchOpType(taicpu(p),top_const,top_reg) and
  4971. (taicpu(p).oper[0]^.val = 0) then
  4972. begin
  4973. taicpu(p).opcode := A_TEST;
  4974. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  4975. Result:=true;
  4976. end;
  4977. end;
  4978. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  4979. var
  4980. IsTestConstX : Boolean;
  4981. hp1,hp2 : tai;
  4982. begin
  4983. Result:=false;
  4984. { removes the line marked with (x) from the sequence
  4985. and/or/xor/add/sub/... $x, %y
  4986. test/or %y, %y | test $-1, %y (x)
  4987. j(n)z _Label
  4988. as the first instruction already adjusts the ZF
  4989. %y operand may also be a reference }
  4990. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  4991. MatchOperand(taicpu(p).oper[0]^,-1);
  4992. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  4993. GetLastInstruction(p, hp1) and
  4994. (tai(hp1).typ = ait_instruction) and
  4995. GetNextInstruction(p,hp2) and
  4996. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  4997. case taicpu(hp1).opcode Of
  4998. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  4999. begin
  5000. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  5001. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5002. { and in case of carry for A(E)/B(E)/C/NC }
  5003. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  5004. ((taicpu(hp1).opcode <> A_ADD) and
  5005. (taicpu(hp1).opcode <> A_SUB))) then
  5006. begin
  5007. hp1 := tai(p.next);
  5008. asml.remove(p);
  5009. p.free;
  5010. p := tai(hp1);
  5011. Result:=true;
  5012. end;
  5013. end;
  5014. A_SHL, A_SAL, A_SHR, A_SAR:
  5015. begin
  5016. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  5017. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  5018. { therefore, it's only safe to do this optimization for }
  5019. { shifts by a (nonzero) constant }
  5020. (taicpu(hp1).oper[0]^.typ = top_const) and
  5021. (taicpu(hp1).oper[0]^.val <> 0) and
  5022. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5023. { and in case of carry for A(E)/B(E)/C/NC }
  5024. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  5025. begin
  5026. hp1 := tai(p.next);
  5027. asml.remove(p);
  5028. p.free;
  5029. p := tai(hp1);
  5030. Result:=true;
  5031. end;
  5032. end;
  5033. A_DEC, A_INC, A_NEG:
  5034. begin
  5035. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  5036. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  5037. { and in case of carry for A(E)/B(E)/C/NC }
  5038. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  5039. begin
  5040. case taicpu(hp1).opcode of
  5041. A_DEC, A_INC:
  5042. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  5043. begin
  5044. case taicpu(hp1).opcode Of
  5045. A_DEC: taicpu(hp1).opcode := A_SUB;
  5046. A_INC: taicpu(hp1).opcode := A_ADD;
  5047. else
  5048. ;
  5049. end;
  5050. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  5051. taicpu(hp1).loadConst(0,1);
  5052. taicpu(hp1).ops:=2;
  5053. end;
  5054. else
  5055. ;
  5056. end;
  5057. hp1 := tai(p.next);
  5058. asml.remove(p);
  5059. p.free;
  5060. p := tai(hp1);
  5061. Result:=true;
  5062. end;
  5063. end
  5064. else
  5065. { change "test $-1,%reg" into "test %reg,%reg" }
  5066. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  5067. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  5068. end { case }
  5069. { change "test $-1,%reg" into "test %reg,%reg" }
  5070. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  5071. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  5072. end;
  5073. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  5074. var
  5075. hp1 : tai;
  5076. {$ifndef x86_64}
  5077. hp2 : taicpu;
  5078. {$endif x86_64}
  5079. begin
  5080. Result:=false;
  5081. {$ifndef x86_64}
  5082. { don't do this on modern CPUs, this really hurts them due to
  5083. broken call/ret pairing }
  5084. if (current_settings.optimizecputype < cpu_Pentium2) and
  5085. not(cs_create_pic in current_settings.moduleswitches) and
  5086. GetNextInstruction(p, hp1) and
  5087. MatchInstruction(hp1,A_JMP,[S_NO]) and
  5088. MatchOpType(taicpu(hp1),top_ref) and
  5089. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  5090. begin
  5091. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  5092. InsertLLItem(p.previous, p, hp2);
  5093. taicpu(p).opcode := A_JMP;
  5094. taicpu(p).is_jmp := true;
  5095. asml.remove(hp1);
  5096. hp1.free;
  5097. Result:=true;
  5098. end
  5099. else
  5100. {$endif x86_64}
  5101. { replace
  5102. call procname
  5103. ret
  5104. by
  5105. jmp procname
  5106. but do it only on level 4 because it destroys stack back traces
  5107. }
  5108. if (cs_opt_level4 in current_settings.optimizerswitches) and
  5109. GetNextInstruction(p, hp1) and
  5110. MatchInstruction(hp1,A_RET,[S_NO]) and
  5111. (taicpu(hp1).ops=0) then
  5112. begin
  5113. taicpu(p).opcode := A_JMP;
  5114. taicpu(p).is_jmp := true;
  5115. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  5116. asml.remove(hp1);
  5117. hp1.free;
  5118. Result:=true;
  5119. end;
  5120. end;
  5121. {$ifdef x86_64}
  5122. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  5123. var
  5124. PreMessage: string;
  5125. begin
  5126. Result := False;
  5127. { Code size reduction by J. Gareth "Kit" Moreton }
  5128. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  5129. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  5130. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  5131. then
  5132. begin
  5133. { Has 64-bit register name and opcode suffix }
  5134. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  5135. { The actual optimization }
  5136. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  5137. if taicpu(p).opsize = S_BQ then
  5138. taicpu(p).changeopsize(S_BL)
  5139. else
  5140. taicpu(p).changeopsize(S_WL);
  5141. DebugMsg(SPeepholeOptimization + PreMessage +
  5142. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  5143. end;
  5144. end;
  5145. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  5146. var
  5147. PreMessage, RegName: string;
  5148. begin
  5149. { Code size reduction by J. Gareth "Kit" Moreton }
  5150. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  5151. as this removes the REX prefix }
  5152. Result := False;
  5153. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  5154. Exit;
  5155. if taicpu(p).oper[0]^.typ <> top_reg then
  5156. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  5157. InternalError(2018011500);
  5158. case taicpu(p).opsize of
  5159. S_Q:
  5160. begin
  5161. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  5162. begin
  5163. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  5164. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  5165. { The actual optimization }
  5166. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  5167. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  5168. taicpu(p).changeopsize(S_L);
  5169. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  5170. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  5171. end;
  5172. end;
  5173. else
  5174. ;
  5175. end;
  5176. end;
  5177. {$endif}
  5178. class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
  5179. var
  5180. OperIdx: Integer;
  5181. begin
  5182. for OperIdx := 0 to p.ops - 1 do
  5183. if p.oper[OperIdx]^.typ = top_ref then
  5184. optimize_ref(p.oper[OperIdx]^.ref^, False);
  5185. end;
  5186. end.