aoptx86.pas 446 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TOptsToCheck = (
  29. aoc_MovAnd2Mov_3
  30. );
  31. TX86AsmOptimizer = class(TAsmOptimizer)
  32. { some optimizations are very expensive to check, so the
  33. pre opt pass can be used to set some flags, depending on the found
  34. instructions if it is worth to check a certain optimization }
  35. OptsToCheck : set of TOptsToCheck;
  36. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  37. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  38. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  39. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  40. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  41. { This version of GetNextInstructionUsingReg will look across conditional jumps,
  42. potentially allowing further optimisation (although it might need to know if
  43. it crossed a conditional jump. }
  44. function GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  45. {
  46. In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
  47. the use of a register by allocs/dealloc, so it can ignore calls.
  48. In the following example, GetNextInstructionUsingReg will return the second movq,
  49. GetNextInstructionUsingRegTrackingUse won't.
  50. movq %rdi,%rax
  51. # Register rdi released
  52. # Register rdi allocated
  53. movq %rax,%rdi
  54. While in this example:
  55. movq %rdi,%rax
  56. call proc
  57. movq %rdi,%rax
  58. GetNextInstructionUsingRegTrackingUse will return the second instruction while GetNextInstructionUsingReg
  59. won't.
  60. }
  61. function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  62. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  63. private
  64. function SkipSimpleInstructions(var hp1: tai): Boolean;
  65. protected
  66. class function IsMOVZXAcceptable: Boolean; static; inline;
  67. { Attempts to allocate a volatile integer register for use between p and hp,
  68. using AUsedRegs for the current register usage information. Returns NR_NO
  69. if no free register could be found }
  70. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  71. { Attempts to allocate a volatile MM register for use between p and hp,
  72. using AUsedRegs for the current register usage information. Returns NR_NO
  73. if no free register could be found }
  74. function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  75. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  76. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  77. { checks whether reading the value in reg1 depends on the value of reg2. This
  78. is very similar to SuperRegisterEquals, except it takes into account that
  79. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  80. depend on the value in AH). }
  81. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  82. { Replaces all references to AOldReg in a memory reference to ANewReg }
  83. class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
  84. { Replaces all references to AOldReg in an operand to ANewReg }
  85. class function ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean; static;
  86. { Replaces all references to AOldReg in an instruction to ANewReg,
  87. except where the register is being written }
  88. function ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  89. { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
  90. or writes to a global symbol }
  91. class function IsRefSafe(const ref: PReference): Boolean; static; inline;
  92. { Returns true if the given MOV instruction can be safely converted to CMOV }
  93. class function CanBeCMOV(p : tai) : boolean; static;
  94. { Converts the LEA instruction to ADD/INC/SUB/DEC. Returns True if the
  95. conversion was successful }
  96. function ConvertLEA(const p : taicpu): Boolean;
  97. function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  98. procedure DebugMsg(const s : string; p : tai);inline;
  99. class function IsExitCode(p : tai) : boolean; static;
  100. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  101. procedure RemoveLastDeallocForFuncRes(p : tai);
  102. function DoSubAddOpt(var p : tai) : Boolean;
  103. function PrePeepholeOptSxx(var p : tai) : boolean;
  104. function PrePeepholeOptIMUL(var p : tai) : boolean;
  105. function PrePeepholeOptAND(var p : tai) : boolean;
  106. function OptPass1Test(var p: tai): boolean;
  107. function OptPass1Add(var p: tai): boolean;
  108. function OptPass1AND(var p : tai) : boolean;
  109. function OptPass1_V_MOVAP(var p : tai) : boolean;
  110. function OptPass1VOP(var p : tai) : boolean;
  111. function OptPass1MOV(var p : tai) : boolean;
  112. function OptPass1Movx(var p : tai) : boolean;
  113. function OptPass1MOVXX(var p : tai) : boolean;
  114. function OptPass1OP(var p : tai) : boolean;
  115. function OptPass1LEA(var p : tai) : boolean;
  116. function OptPass1Sub(var p : tai) : boolean;
  117. function OptPass1SHLSAL(var p : tai) : boolean;
  118. function OptPass1FSTP(var p : tai) : boolean;
  119. function OptPass1FLD(var p : tai) : boolean;
  120. function OptPass1Cmp(var p : tai) : boolean;
  121. function OptPass1PXor(var p : tai) : boolean;
  122. function OptPass1VPXor(var p: tai): boolean;
  123. function OptPass1Imul(var p : tai) : boolean;
  124. function OptPass1Jcc(var p : tai) : boolean;
  125. function OptPass1SHXX(var p: tai): boolean;
  126. function OptPass1_V_Cvtss2sd(var p: tai): boolean;
  127. function OptPass2Movx(var p : tai): Boolean;
  128. function OptPass2MOV(var p : tai) : boolean;
  129. function OptPass2Imul(var p : tai) : boolean;
  130. function OptPass2Jmp(var p : tai) : boolean;
  131. function OptPass2Jcc(var p : tai) : boolean;
  132. function OptPass2Lea(var p: tai): Boolean;
  133. function OptPass2SUB(var p: tai): Boolean;
  134. function OptPass2ADD(var p : tai): Boolean;
  135. function OptPass2SETcc(var p : tai) : boolean;
  136. function CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  137. function PostPeepholeOptMov(var p : tai) : Boolean;
  138. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  139. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  140. function PostPeepholeOptXor(var p : tai) : Boolean;
  141. {$endif}
  142. function PostPeepholeOptAnd(var p : tai) : boolean;
  143. function PostPeepholeOptMOVSX(var p : tai) : boolean;
  144. function PostPeepholeOptCmp(var p : tai) : Boolean;
  145. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  146. function PostPeepholeOptCall(var p : tai) : Boolean;
  147. function PostPeepholeOptLea(var p : tai) : Boolean;
  148. function PostPeepholeOptPush(var p: tai): Boolean;
  149. function PostPeepholeOptShr(var p : tai) : boolean;
  150. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  151. function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  152. procedure SwapMovCmp(var p, hp1: tai);
  153. { Processor-dependent reference optimisation }
  154. class procedure OptimizeRefs(var p: taicpu); static;
  155. end;
  156. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  157. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  158. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  159. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  160. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  161. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  162. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  163. {$if max_operands>2}
  164. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  165. {$endif max_operands>2}
  166. function RefsEqual(const r1, r2: treference): boolean;
  167. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  168. { returns true, if ref is a reference using only the registers passed as base and index
  169. and having an offset }
  170. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  171. implementation
  172. uses
  173. cutils,verbose,
  174. systems,
  175. globals,
  176. cpuinfo,
  177. procinfo,
  178. paramgr,
  179. aasmbase,
  180. aoptbase,aoptutils,
  181. symconst,symsym,
  182. cgx86,
  183. itcpugas;
  184. {$ifdef DEBUG_AOPTCPU}
  185. const
  186. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  187. {$else DEBUG_AOPTCPU}
  188. { Empty strings help the optimizer to remove string concatenations that won't
  189. ever appear to the user on release builds. [Kit] }
  190. const
  191. SPeepholeOptimization = '';
  192. {$endif DEBUG_AOPTCPU}
  193. LIST_STEP_SIZE = 4;
  194. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  195. begin
  196. result :=
  197. (instr.typ = ait_instruction) and
  198. (taicpu(instr).opcode = op) and
  199. ((opsize = []) or (taicpu(instr).opsize in opsize));
  200. end;
  201. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  202. begin
  203. result :=
  204. (instr.typ = ait_instruction) and
  205. ((taicpu(instr).opcode = op1) or
  206. (taicpu(instr).opcode = op2)
  207. ) and
  208. ((opsize = []) or (taicpu(instr).opsize in opsize));
  209. end;
  210. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  211. begin
  212. result :=
  213. (instr.typ = ait_instruction) and
  214. ((taicpu(instr).opcode = op1) or
  215. (taicpu(instr).opcode = op2) or
  216. (taicpu(instr).opcode = op3)
  217. ) and
  218. ((opsize = []) or (taicpu(instr).opsize in opsize));
  219. end;
  220. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  221. const opsize : topsizes) : boolean;
  222. var
  223. op : TAsmOp;
  224. begin
  225. result:=false;
  226. if (instr.typ <> ait_instruction) or
  227. ((opsize <> []) and not(taicpu(instr).opsize in opsize)) then
  228. exit;
  229. for op in ops do
  230. begin
  231. if taicpu(instr).opcode = op then
  232. begin
  233. result:=true;
  234. exit;
  235. end;
  236. end;
  237. end;
  238. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  239. begin
  240. result := (oper.typ = top_reg) and (oper.reg = reg);
  241. end;
  242. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  243. begin
  244. result := (oper.typ = top_const) and (oper.val = a);
  245. end;
  246. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  247. begin
  248. result := oper1.typ = oper2.typ;
  249. if result then
  250. case oper1.typ of
  251. top_const:
  252. Result:=oper1.val = oper2.val;
  253. top_reg:
  254. Result:=oper1.reg = oper2.reg;
  255. top_ref:
  256. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  257. else
  258. internalerror(2013102801);
  259. end
  260. end;
  261. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  262. begin
  263. result := (oper1.typ = oper2.typ) and (oper1.typ = oper3.typ);
  264. if result then
  265. case oper1.typ of
  266. top_const:
  267. Result:=(oper1.val = oper2.val) and (oper1.val = oper3.val);
  268. top_reg:
  269. Result:=(oper1.reg = oper2.reg) and (oper1.reg = oper3.reg);
  270. top_ref:
  271. Result:=RefsEqual(oper1.ref^, oper2.ref^) and RefsEqual(oper1.ref^, oper3.ref^);
  272. else
  273. internalerror(2020052401);
  274. end
  275. end;
  276. function RefsEqual(const r1, r2: treference): boolean;
  277. begin
  278. RefsEqual :=
  279. (r1.offset = r2.offset) and
  280. (r1.segment = r2.segment) and (r1.base = r2.base) and
  281. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  282. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  283. (r1.relsymbol = r2.relsymbol) and
  284. (r1.volatility=[]) and
  285. (r2.volatility=[]);
  286. end;
  287. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  288. begin
  289. Result:=(ref.offset=0) and
  290. (ref.scalefactor in [0,1]) and
  291. (ref.segment=NR_NO) and
  292. (ref.symbol=nil) and
  293. (ref.relsymbol=nil) and
  294. ((base=NR_INVALID) or
  295. (ref.base=base)) and
  296. ((index=NR_INVALID) or
  297. (ref.index=index)) and
  298. (ref.volatility=[]);
  299. end;
  300. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  301. begin
  302. Result:=(ref.scalefactor in [0,1]) and
  303. (ref.segment=NR_NO) and
  304. (ref.symbol=nil) and
  305. (ref.relsymbol=nil) and
  306. ((base=NR_INVALID) or
  307. (ref.base=base)) and
  308. ((index=NR_INVALID) or
  309. (ref.index=index)) and
  310. (ref.volatility=[]);
  311. end;
  312. function InstrReadsFlags(p: tai): boolean;
  313. begin
  314. InstrReadsFlags := true;
  315. case p.typ of
  316. ait_instruction:
  317. if InsProp[taicpu(p).opcode].Ch*
  318. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  319. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  320. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  321. exit;
  322. ait_label:
  323. exit;
  324. else
  325. ;
  326. end;
  327. InstrReadsFlags := false;
  328. end;
  329. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  330. begin
  331. Next:=Current;
  332. repeat
  333. Result:=GetNextInstruction(Next,Next);
  334. until not (Result) or
  335. not(cs_opt_level3 in current_settings.optimizerswitches) or
  336. (Next.typ<>ait_instruction) or
  337. RegInInstruction(reg,Next) or
  338. is_calljmp(taicpu(Next).opcode);
  339. end;
  340. function TX86AsmOptimizer.GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  341. begin
  342. { Note, CrossJump keeps its input value if a conditional jump is not found - it doesn't get set to False }
  343. Next := Current;
  344. repeat
  345. Result := GetNextInstruction(Next,Next);
  346. if Result and (Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) then
  347. if is_calljmpuncondret(taicpu(Next).opcode) then
  348. begin
  349. Result := False;
  350. Exit;
  351. end
  352. else
  353. CrossJump := True;
  354. until not Result or
  355. not (cs_opt_level3 in current_settings.optimizerswitches) or
  356. (Next.typ <> ait_instruction) or
  357. RegInInstruction(reg,Next);
  358. end;
  359. function TX86AsmOptimizer.GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  360. begin
  361. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  362. begin
  363. Result:=GetNextInstruction(Current,Next);
  364. exit;
  365. end;
  366. Next:=tai(Current.Next);
  367. Result:=false;
  368. while assigned(Next) do
  369. begin
  370. if ((Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) and not(taicpu(Next).opcode=A_CALL)) or
  371. ((Next.typ=ait_regalloc) and (getsupreg(tai_regalloc(Next).reg)=getsupreg(reg))) or
  372. ((Next.typ=ait_label) and not(labelCanBeSkipped(Tai_Label(Next)))) then
  373. exit
  374. else if (Next.typ=ait_instruction) and RegInInstruction(reg,Next) and not(taicpu(Next).opcode=A_CALL) then
  375. begin
  376. Result:=true;
  377. exit;
  378. end;
  379. Next:=tai(Next.Next);
  380. end;
  381. end;
  382. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  383. begin
  384. Result:=RegReadByInstruction(reg,hp);
  385. end;
  386. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  387. var
  388. p: taicpu;
  389. opcount: longint;
  390. begin
  391. RegReadByInstruction := false;
  392. if hp.typ <> ait_instruction then
  393. exit;
  394. p := taicpu(hp);
  395. case p.opcode of
  396. A_CALL:
  397. regreadbyinstruction := true;
  398. A_IMUL:
  399. case p.ops of
  400. 1:
  401. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  402. (
  403. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  404. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  405. );
  406. 2,3:
  407. regReadByInstruction :=
  408. reginop(reg,p.oper[0]^) or
  409. reginop(reg,p.oper[1]^);
  410. else
  411. InternalError(2019112801);
  412. end;
  413. A_MUL:
  414. begin
  415. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  416. (
  417. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  418. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  419. );
  420. end;
  421. A_IDIV,A_DIV:
  422. begin
  423. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  424. (
  425. (getregtype(reg)=R_INTREGISTER) and
  426. (
  427. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  428. )
  429. );
  430. end;
  431. else
  432. begin
  433. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  434. begin
  435. RegReadByInstruction := false;
  436. exit;
  437. end;
  438. for opcount := 0 to p.ops-1 do
  439. if (p.oper[opCount]^.typ = top_ref) and
  440. RegInRef(reg,p.oper[opcount]^.ref^) then
  441. begin
  442. RegReadByInstruction := true;
  443. exit
  444. end;
  445. { special handling for SSE MOVSD }
  446. if (p.opcode=A_MOVSD) and (p.ops>0) then
  447. begin
  448. if p.ops<>2 then
  449. internalerror(2017042702);
  450. regReadByInstruction := reginop(reg,p.oper[0]^) or
  451. (
  452. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  453. );
  454. exit;
  455. end;
  456. with insprop[p.opcode] do
  457. begin
  458. if getregtype(reg)=R_INTREGISTER then
  459. begin
  460. case getsupreg(reg) of
  461. RS_EAX:
  462. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  463. begin
  464. RegReadByInstruction := true;
  465. exit
  466. end;
  467. RS_ECX:
  468. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  469. begin
  470. RegReadByInstruction := true;
  471. exit
  472. end;
  473. RS_EDX:
  474. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  475. begin
  476. RegReadByInstruction := true;
  477. exit
  478. end;
  479. RS_EBX:
  480. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  481. begin
  482. RegReadByInstruction := true;
  483. exit
  484. end;
  485. RS_ESP:
  486. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  487. begin
  488. RegReadByInstruction := true;
  489. exit
  490. end;
  491. RS_EBP:
  492. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  493. begin
  494. RegReadByInstruction := true;
  495. exit
  496. end;
  497. RS_ESI:
  498. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  499. begin
  500. RegReadByInstruction := true;
  501. exit
  502. end;
  503. RS_EDI:
  504. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  505. begin
  506. RegReadByInstruction := true;
  507. exit
  508. end;
  509. end;
  510. end;
  511. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  512. begin
  513. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  514. begin
  515. case p.condition of
  516. C_A,C_NBE, { CF=0 and ZF=0 }
  517. C_BE,C_NA: { CF=1 or ZF=1 }
  518. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  519. C_AE,C_NB,C_NC, { CF=0 }
  520. C_B,C_NAE,C_C: { CF=1 }
  521. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  522. C_NE,C_NZ, { ZF=0 }
  523. C_E,C_Z: { ZF=1 }
  524. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  525. C_G,C_NLE, { ZF=0 and SF=OF }
  526. C_LE,C_NG: { ZF=1 or SF<>OF }
  527. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  528. C_GE,C_NL, { SF=OF }
  529. C_L,C_NGE: { SF<>OF }
  530. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  531. C_NO, { OF=0 }
  532. C_O: { OF=1 }
  533. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  534. C_NP,C_PO, { PF=0 }
  535. C_P,C_PE: { PF=1 }
  536. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  537. C_NS, { SF=0 }
  538. C_S: { SF=1 }
  539. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  540. else
  541. internalerror(2017042701);
  542. end;
  543. if RegReadByInstruction then
  544. exit;
  545. end;
  546. case getsubreg(reg) of
  547. R_SUBW,R_SUBD,R_SUBQ:
  548. RegReadByInstruction :=
  549. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  550. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  551. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  552. R_SUBFLAGCARRY:
  553. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  554. R_SUBFLAGPARITY:
  555. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  556. R_SUBFLAGAUXILIARY:
  557. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  558. R_SUBFLAGZERO:
  559. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  560. R_SUBFLAGSIGN:
  561. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  562. R_SUBFLAGOVERFLOW:
  563. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  564. R_SUBFLAGINTERRUPT:
  565. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  566. R_SUBFLAGDIRECTION:
  567. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  568. else
  569. internalerror(2017042601);
  570. end;
  571. exit;
  572. end;
  573. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  574. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  575. (p.oper[0]^.reg=p.oper[1]^.reg) then
  576. exit;
  577. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  578. begin
  579. RegReadByInstruction := true;
  580. exit
  581. end;
  582. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  583. begin
  584. RegReadByInstruction := true;
  585. exit
  586. end;
  587. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  588. begin
  589. RegReadByInstruction := true;
  590. exit
  591. end;
  592. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  593. begin
  594. RegReadByInstruction := true;
  595. exit
  596. end;
  597. end;
  598. end;
  599. end;
  600. end;
  601. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  602. begin
  603. result:=false;
  604. if p1.typ<>ait_instruction then
  605. exit;
  606. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  607. exit(true);
  608. if (getregtype(reg)=R_INTREGISTER) and
  609. { change information for xmm movsd are not correct }
  610. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  611. begin
  612. case getsupreg(reg) of
  613. { RS_EAX = RS_RAX on x86-64 }
  614. RS_EAX:
  615. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  616. RS_ECX:
  617. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  618. RS_EDX:
  619. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  620. RS_EBX:
  621. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  622. RS_ESP:
  623. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  624. RS_EBP:
  625. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  626. RS_ESI:
  627. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  628. RS_EDI:
  629. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  630. else
  631. ;
  632. end;
  633. if result then
  634. exit;
  635. end
  636. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  637. begin
  638. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  639. exit(true);
  640. case getsubreg(reg) of
  641. R_SUBFLAGCARRY:
  642. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  643. R_SUBFLAGPARITY:
  644. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  645. R_SUBFLAGAUXILIARY:
  646. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  647. R_SUBFLAGZERO:
  648. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  649. R_SUBFLAGSIGN:
  650. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  651. R_SUBFLAGOVERFLOW:
  652. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  653. R_SUBFLAGINTERRUPT:
  654. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  655. R_SUBFLAGDIRECTION:
  656. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  657. else
  658. ;
  659. end;
  660. if result then
  661. exit;
  662. end
  663. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  664. exit(true);
  665. Result:=inherited RegInInstruction(Reg, p1);
  666. end;
  667. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  668. begin
  669. Result := False;
  670. if p1.typ <> ait_instruction then
  671. exit;
  672. with insprop[taicpu(p1).opcode] do
  673. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  674. begin
  675. case getsubreg(reg) of
  676. R_SUBW,R_SUBD,R_SUBQ:
  677. Result :=
  678. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  679. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  680. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  681. R_SUBFLAGCARRY:
  682. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  683. R_SUBFLAGPARITY:
  684. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  685. R_SUBFLAGAUXILIARY:
  686. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  687. R_SUBFLAGZERO:
  688. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  689. R_SUBFLAGSIGN:
  690. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  691. R_SUBFLAGOVERFLOW:
  692. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  693. R_SUBFLAGINTERRUPT:
  694. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  695. R_SUBFLAGDIRECTION:
  696. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  697. else
  698. internalerror(2017042602);
  699. end;
  700. exit;
  701. end;
  702. case taicpu(p1).opcode of
  703. A_CALL:
  704. { We could potentially set Result to False if the register in
  705. question is non-volatile for the subroutine's calling convention,
  706. but this would require detecting the calling convention in use and
  707. also assuming that the routine doesn't contain malformed assembly
  708. language, for example... so it could only be done under -O4 as it
  709. would be considered a side-effect. [Kit] }
  710. Result := True;
  711. A_MOVSD:
  712. { special handling for SSE MOVSD }
  713. if (taicpu(p1).ops>0) then
  714. begin
  715. if taicpu(p1).ops<>2 then
  716. internalerror(2017042703);
  717. Result := (taicpu(p1).oper[1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[1]^);
  718. end;
  719. { VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
  720. so fix it here (FK)
  721. }
  722. A_VMOVSS,
  723. A_VMOVSD:
  724. begin
  725. Result := (taicpu(p1).ops=3) and (taicpu(p1).oper[2]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[2]^);
  726. exit;
  727. end;
  728. A_IMUL:
  729. Result := (taicpu(p1).oper[taicpu(p1).ops-1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[taicpu(p1).ops-1]^);
  730. else
  731. ;
  732. end;
  733. if Result then
  734. exit;
  735. with insprop[taicpu(p1).opcode] do
  736. begin
  737. if getregtype(reg)=R_INTREGISTER then
  738. begin
  739. case getsupreg(reg) of
  740. RS_EAX:
  741. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  742. begin
  743. Result := True;
  744. exit
  745. end;
  746. RS_ECX:
  747. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  748. begin
  749. Result := True;
  750. exit
  751. end;
  752. RS_EDX:
  753. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  754. begin
  755. Result := True;
  756. exit
  757. end;
  758. RS_EBX:
  759. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  760. begin
  761. Result := True;
  762. exit
  763. end;
  764. RS_ESP:
  765. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  766. begin
  767. Result := True;
  768. exit
  769. end;
  770. RS_EBP:
  771. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  772. begin
  773. Result := True;
  774. exit
  775. end;
  776. RS_ESI:
  777. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  778. begin
  779. Result := True;
  780. exit
  781. end;
  782. RS_EDI:
  783. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  784. begin
  785. Result := True;
  786. exit
  787. end;
  788. end;
  789. end;
  790. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  791. begin
  792. Result := true;
  793. exit
  794. end;
  795. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  796. begin
  797. Result := true;
  798. exit
  799. end;
  800. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  801. begin
  802. Result := true;
  803. exit
  804. end;
  805. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  806. begin
  807. Result := true;
  808. exit
  809. end;
  810. end;
  811. end;
  812. {$ifdef DEBUG_AOPTCPU}
  813. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  814. begin
  815. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  816. end;
  817. function debug_tostr(i: tcgint): string; inline;
  818. begin
  819. Result := tostr(i);
  820. end;
  821. function debug_regname(r: TRegister): string; inline;
  822. begin
  823. Result := '%' + std_regname(r);
  824. end;
  825. { Debug output function - creates a string representation of an operator }
  826. function debug_operstr(oper: TOper): string;
  827. begin
  828. case oper.typ of
  829. top_const:
  830. Result := '$' + debug_tostr(oper.val);
  831. top_reg:
  832. Result := debug_regname(oper.reg);
  833. top_ref:
  834. begin
  835. if oper.ref^.offset <> 0 then
  836. Result := debug_tostr(oper.ref^.offset) + '('
  837. else
  838. Result := '(';
  839. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  840. begin
  841. Result := Result + debug_regname(oper.ref^.base);
  842. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  843. Result := Result + ',' + debug_regname(oper.ref^.index);
  844. end
  845. else
  846. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  847. Result := Result + debug_regname(oper.ref^.index);
  848. if (oper.ref^.scalefactor > 1) then
  849. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  850. else
  851. Result := Result + ')';
  852. end;
  853. else
  854. Result := '[UNKNOWN]';
  855. end;
  856. end;
  857. function debug_op2str(opcode: tasmop): string; inline;
  858. begin
  859. Result := std_op2str[opcode];
  860. end;
  861. function debug_opsize2str(opsize: topsize): string; inline;
  862. begin
  863. Result := gas_opsize2str[opsize];
  864. end;
  865. {$else DEBUG_AOPTCPU}
  866. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  867. begin
  868. end;
  869. function debug_tostr(i: tcgint): string; inline;
  870. begin
  871. Result := '';
  872. end;
  873. function debug_regname(r: TRegister): string; inline;
  874. begin
  875. Result := '';
  876. end;
  877. function debug_operstr(oper: TOper): string; inline;
  878. begin
  879. Result := '';
  880. end;
  881. function debug_op2str(opcode: tasmop): string; inline;
  882. begin
  883. Result := '';
  884. end;
  885. function debug_opsize2str(opsize: topsize): string; inline;
  886. begin
  887. Result := '';
  888. end;
  889. {$endif DEBUG_AOPTCPU}
  890. class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
  891. begin
  892. {$ifdef x86_64}
  893. { Always fine on x86-64 }
  894. Result := True;
  895. {$else x86_64}
  896. Result :=
  897. {$ifdef i8086}
  898. (current_settings.cputype >= cpu_386) and
  899. {$endif i8086}
  900. (
  901. { Always accept if optimising for size }
  902. (cs_opt_size in current_settings.optimizerswitches) or
  903. { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
  904. (current_settings.optimizecputype >= cpu_Pentium2)
  905. );
  906. {$endif x86_64}
  907. end;
  908. { Attempts to allocate a volatile integer register for use between p and hp,
  909. using AUsedRegs for the current register usage information. Returns NR_NO
  910. if no free register could be found }
  911. function TX86AsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  912. var
  913. RegSet: TCPURegisterSet;
  914. CurrentSuperReg: Integer;
  915. CurrentReg: TRegister;
  916. Currentp: tai;
  917. Breakout: Boolean;
  918. begin
  919. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  920. Result := NR_NO;
  921. RegSet := paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption);
  922. for CurrentSuperReg in RegSet do
  923. begin
  924. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  925. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg) then
  926. begin
  927. Currentp := p;
  928. Breakout := False;
  929. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  930. begin
  931. case Currentp.typ of
  932. ait_instruction:
  933. begin
  934. if RegInInstruction(CurrentReg, Currentp) then
  935. begin
  936. Breakout := True;
  937. Break;
  938. end;
  939. { Cannot allocate across an unconditional jump }
  940. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  941. Exit;
  942. end;
  943. ait_marker:
  944. { Don't try anything more if a marker is hit }
  945. Exit;
  946. ait_regalloc:
  947. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  948. begin
  949. Breakout := True;
  950. Break;
  951. end;
  952. else
  953. ;
  954. end;
  955. end;
  956. if Breakout then
  957. { Try the next register }
  958. Continue;
  959. { We have a free register available }
  960. Result := CurrentReg;
  961. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  962. Exit;
  963. end;
  964. end;
  965. end;
  966. { Attempts to allocate a volatile MM register for use between p and hp,
  967. using AUsedRegs for the current register usage information. Returns NR_NO
  968. if no free register could be found }
  969. function TX86AsmOptimizer.GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  970. var
  971. RegSet: TCPURegisterSet;
  972. CurrentSuperReg: Integer;
  973. CurrentReg: TRegister;
  974. Currentp: tai;
  975. Breakout: Boolean;
  976. begin
  977. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  978. Result := NR_NO;
  979. RegSet := paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption);
  980. for CurrentSuperReg in RegSet do
  981. begin
  982. CurrentReg := newreg(R_MMREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  983. if not AUsedRegs[R_MMREGISTER].IsUsed(CurrentReg) then
  984. begin
  985. Currentp := p;
  986. Breakout := False;
  987. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  988. begin
  989. case Currentp.typ of
  990. ait_instruction:
  991. begin
  992. if RegInInstruction(CurrentReg, Currentp) then
  993. begin
  994. Breakout := True;
  995. Break;
  996. end;
  997. { Cannot allocate across an unconditional jump }
  998. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  999. Exit;
  1000. end;
  1001. ait_marker:
  1002. { Don't try anything more if a marker is hit }
  1003. Exit;
  1004. ait_regalloc:
  1005. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1006. begin
  1007. Breakout := True;
  1008. Break;
  1009. end;
  1010. else
  1011. ;
  1012. end;
  1013. end;
  1014. if Breakout then
  1015. { Try the next register }
  1016. Continue;
  1017. { We have a free register available }
  1018. Result := CurrentReg;
  1019. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1020. Exit;
  1021. end;
  1022. end;
  1023. end;
  1024. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  1025. begin
  1026. if not SuperRegistersEqual(reg1,reg2) then
  1027. exit(false);
  1028. if getregtype(reg1)<>R_INTREGISTER then
  1029. exit(true); {because SuperRegisterEqual is true}
  1030. case getsubreg(reg1) of
  1031. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  1032. higher, it preserves the high bits, so the new value depends on
  1033. reg2's previous value. In other words, it is equivalent to doing:
  1034. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  1035. R_SUBL:
  1036. exit(getsubreg(reg2)=R_SUBL);
  1037. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  1038. higher, it actually does a:
  1039. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  1040. R_SUBH:
  1041. exit(getsubreg(reg2)=R_SUBH);
  1042. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  1043. bits of reg2:
  1044. reg2 := (reg2 and $ffff0000) or word(reg1); }
  1045. R_SUBW:
  1046. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  1047. { a write to R_SUBD always overwrites every other subregister,
  1048. because it clears the high 32 bits of R_SUBQ on x86_64 }
  1049. R_SUBD,
  1050. R_SUBQ:
  1051. exit(true);
  1052. else
  1053. internalerror(2017042801);
  1054. end;
  1055. end;
  1056. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  1057. begin
  1058. if not SuperRegistersEqual(reg1,reg2) then
  1059. exit(false);
  1060. if getregtype(reg1)<>R_INTREGISTER then
  1061. exit(true); {because SuperRegisterEqual is true}
  1062. case getsubreg(reg1) of
  1063. R_SUBL:
  1064. exit(getsubreg(reg2)<>R_SUBH);
  1065. R_SUBH:
  1066. exit(getsubreg(reg2)<>R_SUBL);
  1067. R_SUBW,
  1068. R_SUBD,
  1069. R_SUBQ:
  1070. exit(true);
  1071. else
  1072. internalerror(2017042802);
  1073. end;
  1074. end;
  1075. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  1076. var
  1077. hp1 : tai;
  1078. l : TCGInt;
  1079. begin
  1080. result:=false;
  1081. { changes the code sequence
  1082. shr/sar const1, x
  1083. shl const2, x
  1084. to
  1085. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  1086. if GetNextInstruction(p, hp1) and
  1087. MatchInstruction(hp1,A_SHL,[]) and
  1088. (taicpu(p).oper[0]^.typ = top_const) and
  1089. (taicpu(hp1).oper[0]^.typ = top_const) and
  1090. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1091. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  1092. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  1093. begin
  1094. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  1095. not(cs_opt_size in current_settings.optimizerswitches) then
  1096. begin
  1097. { shr/sar const1, %reg
  1098. shl const2, %reg
  1099. with const1 > const2 }
  1100. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1101. taicpu(hp1).opcode := A_AND;
  1102. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  1103. case taicpu(p).opsize Of
  1104. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  1105. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  1106. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  1107. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1108. else
  1109. Internalerror(2017050703)
  1110. end;
  1111. end
  1112. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  1113. not(cs_opt_size in current_settings.optimizerswitches) then
  1114. begin
  1115. { shr/sar const1, %reg
  1116. shl const2, %reg
  1117. with const1 < const2 }
  1118. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  1119. taicpu(p).opcode := A_AND;
  1120. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1121. case taicpu(p).opsize Of
  1122. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1123. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1124. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1125. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1126. else
  1127. Internalerror(2017050702)
  1128. end;
  1129. end
  1130. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  1131. begin
  1132. { shr/sar const1, %reg
  1133. shl const2, %reg
  1134. with const1 = const2 }
  1135. taicpu(p).opcode := A_AND;
  1136. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1137. case taicpu(p).opsize Of
  1138. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1139. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1140. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1141. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1142. else
  1143. Internalerror(2017050701)
  1144. end;
  1145. RemoveInstruction(hp1);
  1146. end;
  1147. end;
  1148. end;
  1149. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  1150. var
  1151. opsize : topsize;
  1152. hp1 : tai;
  1153. tmpref : treference;
  1154. ShiftValue : Cardinal;
  1155. BaseValue : TCGInt;
  1156. begin
  1157. result:=false;
  1158. opsize:=taicpu(p).opsize;
  1159. { changes certain "imul const, %reg"'s to lea sequences }
  1160. if (MatchOpType(taicpu(p),top_const,top_reg) or
  1161. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  1162. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  1163. if (taicpu(p).oper[0]^.val = 1) then
  1164. if (taicpu(p).ops = 2) then
  1165. { remove "imul $1, reg" }
  1166. begin
  1167. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  1168. Result := RemoveCurrentP(p);
  1169. end
  1170. else
  1171. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  1172. begin
  1173. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  1174. InsertLLItem(p.previous, p.next, hp1);
  1175. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  1176. p.free;
  1177. p := hp1;
  1178. end
  1179. else if ((taicpu(p).ops <= 2) or
  1180. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  1181. not(cs_opt_size in current_settings.optimizerswitches) and
  1182. (not(GetNextInstruction(p, hp1)) or
  1183. not((tai(hp1).typ = ait_instruction) and
  1184. ((taicpu(hp1).opcode=A_Jcc) and
  1185. (taicpu(hp1).condition in [C_O,C_NO])))) then
  1186. begin
  1187. {
  1188. imul X, reg1, reg2 to
  1189. lea (reg1,reg1,Y), reg2
  1190. shl ZZ,reg2
  1191. imul XX, reg1 to
  1192. lea (reg1,reg1,YY), reg1
  1193. shl ZZ,reg2
  1194. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  1195. it does not exist as a separate optimization target in FPC though.
  1196. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  1197. at most two zeros
  1198. }
  1199. reference_reset(tmpref,1,[]);
  1200. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  1201. begin
  1202. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  1203. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  1204. TmpRef.base := taicpu(p).oper[1]^.reg;
  1205. TmpRef.index := taicpu(p).oper[1]^.reg;
  1206. if not(BaseValue in [3,5,9]) then
  1207. Internalerror(2018110101);
  1208. TmpRef.ScaleFactor := BaseValue-1;
  1209. if (taicpu(p).ops = 2) then
  1210. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  1211. else
  1212. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  1213. AsmL.InsertAfter(hp1,p);
  1214. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  1215. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  1216. RemoveCurrentP(p, hp1);
  1217. if ShiftValue>0 then
  1218. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  1219. end;
  1220. end;
  1221. end;
  1222. function TX86AsmOptimizer.PrePeepholeOptAND(var p : tai) : boolean;
  1223. begin
  1224. Result := False;
  1225. if MatchOperand(taicpu(p).oper[0]^, 0) and
  1226. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  1227. begin
  1228. DebugMsg(SPeepholeOptimization + 'AND 0 -> MOV 0', p);
  1229. taicpu(p).opcode := A_MOV;
  1230. Result := True;
  1231. end;
  1232. end;
  1233. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  1234. var
  1235. p: taicpu absolute hp;
  1236. i: Integer;
  1237. begin
  1238. Result := False;
  1239. if not assigned(hp) or
  1240. (hp.typ <> ait_instruction) then
  1241. Exit;
  1242. // p := taicpu(hp);
  1243. Prefetch(insprop[p.opcode]);
  1244. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  1245. with insprop[p.opcode] do
  1246. begin
  1247. case getsubreg(reg) of
  1248. R_SUBW,R_SUBD,R_SUBQ:
  1249. Result:=
  1250. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  1251. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  1252. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  1253. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  1254. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  1255. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  1256. R_SUBFLAGCARRY:
  1257. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  1258. R_SUBFLAGPARITY:
  1259. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  1260. R_SUBFLAGAUXILIARY:
  1261. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  1262. R_SUBFLAGZERO:
  1263. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  1264. R_SUBFLAGSIGN:
  1265. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  1266. R_SUBFLAGOVERFLOW:
  1267. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  1268. R_SUBFLAGINTERRUPT:
  1269. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  1270. R_SUBFLAGDIRECTION:
  1271. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  1272. else
  1273. begin
  1274. writeln(getsubreg(reg));
  1275. internalerror(2017050501);
  1276. end;
  1277. end;
  1278. exit;
  1279. end;
  1280. { Handle special cases first }
  1281. case p.opcode of
  1282. A_MOV, A_MOVZX, A_MOVSX, A_LEA, A_VMOVSS, A_VMOVSD, A_VMOVAPD,
  1283. A_VMOVAPS, A_VMOVQ, A_MOVSS, A_MOVSD, A_MOVQ, A_MOVAPD, A_MOVAPS:
  1284. begin
  1285. Result :=
  1286. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1287. (p.oper[1]^.typ = top_reg) and
  1288. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1289. (
  1290. (p.oper[0]^.typ = top_const) or
  1291. (
  1292. (p.oper[0]^.typ = top_reg) and
  1293. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))
  1294. ) or (
  1295. (p.oper[0]^.typ = top_ref) and
  1296. not RegInRef(reg,p.oper[0]^.ref^)
  1297. )
  1298. );
  1299. end;
  1300. A_MUL, A_IMUL:
  1301. Result :=
  1302. (
  1303. (p.ops=3) and { IMUL only }
  1304. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1305. (
  1306. (
  1307. (p.oper[1]^.typ=top_reg) and
  1308. not Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg)
  1309. ) or (
  1310. (p.oper[1]^.typ=top_ref) and
  1311. not RegInRef(reg,p.oper[1]^.ref^)
  1312. )
  1313. )
  1314. ) or (
  1315. (
  1316. (p.ops=1) and
  1317. (
  1318. (
  1319. (
  1320. (p.oper[0]^.typ=top_reg) and
  1321. not Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg)
  1322. )
  1323. ) or (
  1324. (p.oper[0]^.typ=top_ref) and
  1325. not RegInRef(reg,p.oper[0]^.ref^)
  1326. )
  1327. ) and (
  1328. (
  1329. (p.opsize=S_B) and
  1330. Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and
  1331. not Reg1ReadDependsOnReg2(NR_AL,reg)
  1332. ) or (
  1333. (p.opsize=S_W) and
  1334. Reg1WriteOverwritesReg2Entirely(NR_DX,reg)
  1335. ) or (
  1336. (p.opsize=S_L) and
  1337. Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)
  1338. {$ifdef x86_64}
  1339. ) or (
  1340. (p.opsize=S_Q) and
  1341. Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)
  1342. {$endif x86_64}
  1343. )
  1344. )
  1345. )
  1346. );
  1347. A_CBW:
  1348. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg));
  1349. {$ifndef x86_64}
  1350. A_LDS:
  1351. Result := (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1352. A_LES:
  1353. Result := (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^));
  1354. {$endif not x86_64}
  1355. A_LFS:
  1356. Result := (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1357. A_LGS:
  1358. Result := (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1359. A_LSS:
  1360. Result := (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1361. A_LAHF{$ifndef x86_64}, A_AAM{$endif not x86_64}:
  1362. Result := Reg1WriteOverwritesReg2Entirely(NR_AH,reg);
  1363. A_LODSB:
  1364. Result := Reg1WriteOverwritesReg2Entirely(NR_AL,reg);
  1365. A_LODSW:
  1366. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg);
  1367. {$ifdef x86_64}
  1368. A_LODSQ:
  1369. Result := Reg1WriteOverwritesReg2Entirely(NR_RAX,reg);
  1370. {$endif x86_64}
  1371. A_LODSD:
  1372. Result := Reg1WriteOverwritesReg2Entirely(NR_EAX,reg);
  1373. A_FSTSW, A_FNSTSW:
  1374. Result := (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg);
  1375. else
  1376. begin
  1377. with insprop[p.opcode] do
  1378. begin
  1379. if (
  1380. { xor %reg,%reg etc. is classed as a new value }
  1381. (([Ch_NoReadIfEqualRegs]*Ch)<>[]) and
  1382. MatchOpType(p, top_reg, top_reg) and
  1383. (p.oper[0]^.reg = p.oper[1]^.reg) and
  1384. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)
  1385. ) then
  1386. begin
  1387. Result := True;
  1388. Exit;
  1389. end;
  1390. { Make sure the entire register is overwritten }
  1391. if (getregtype(reg) = R_INTREGISTER) then
  1392. begin
  1393. if (p.ops > 0) then
  1394. begin
  1395. if RegInOp(reg, p.oper[0]^) then
  1396. begin
  1397. if (p.oper[0]^.typ = top_ref) then
  1398. begin
  1399. if RegInRef(reg, p.oper[0]^.ref^) then
  1400. begin
  1401. Result := False;
  1402. Exit;
  1403. end;
  1404. end
  1405. else if (p.oper[0]^.typ = top_reg) then
  1406. begin
  1407. if ([Ch_ROp1, Ch_RWOp1, Ch_MOp1]*Ch<>[]) then
  1408. begin
  1409. Result := False;
  1410. Exit;
  1411. end
  1412. else if ([Ch_WOp1]*Ch<>[]) then
  1413. begin
  1414. if Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg, reg) then
  1415. Result := True
  1416. else
  1417. begin
  1418. Result := False;
  1419. Exit;
  1420. end;
  1421. end;
  1422. end;
  1423. end;
  1424. if (p.ops > 1) then
  1425. begin
  1426. if RegInOp(reg, p.oper[1]^) then
  1427. begin
  1428. if (p.oper[1]^.typ = top_ref) then
  1429. begin
  1430. if RegInRef(reg, p.oper[1]^.ref^) then
  1431. begin
  1432. Result := False;
  1433. Exit;
  1434. end;
  1435. end
  1436. else if (p.oper[1]^.typ = top_reg) then
  1437. begin
  1438. if ([Ch_ROp2, Ch_RWOp2, Ch_MOp2]*Ch<>[]) then
  1439. begin
  1440. Result := False;
  1441. Exit;
  1442. end
  1443. else if ([Ch_WOp2]*Ch<>[]) then
  1444. begin
  1445. if Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg, reg) then
  1446. Result := True
  1447. else
  1448. begin
  1449. Result := False;
  1450. Exit;
  1451. end;
  1452. end;
  1453. end;
  1454. end;
  1455. if (p.ops > 2) then
  1456. begin
  1457. if RegInOp(reg, p.oper[2]^) then
  1458. begin
  1459. if (p.oper[2]^.typ = top_ref) then
  1460. begin
  1461. if RegInRef(reg, p.oper[2]^.ref^) then
  1462. begin
  1463. Result := False;
  1464. Exit;
  1465. end;
  1466. end
  1467. else if (p.oper[2]^.typ = top_reg) then
  1468. begin
  1469. if ([Ch_ROp3, Ch_RWOp3, Ch_MOp3]*Ch<>[]) then
  1470. begin
  1471. Result := False;
  1472. Exit;
  1473. end
  1474. else if ([Ch_WOp3]*Ch<>[]) then
  1475. begin
  1476. if Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg, reg) then
  1477. Result := True
  1478. else
  1479. begin
  1480. Result := False;
  1481. Exit;
  1482. end;
  1483. end;
  1484. end;
  1485. end;
  1486. if (p.ops > 3) and RegInOp(reg, p.oper[3]^) then
  1487. begin
  1488. if (p.oper[3]^.typ = top_ref) then
  1489. begin
  1490. if RegInRef(reg, p.oper[3]^.ref^) then
  1491. begin
  1492. Result := False;
  1493. Exit;
  1494. end;
  1495. end
  1496. else if (p.oper[3]^.typ = top_reg) then
  1497. begin
  1498. if ([Ch_ROp4, Ch_RWOp4, Ch_MOp4]*Ch<>[]) then
  1499. begin
  1500. Result := False;
  1501. Exit;
  1502. end
  1503. else if ([Ch_WOp4]*Ch<>[]) then
  1504. begin
  1505. if Reg1WriteOverwritesReg2Entirely(p.oper[3]^.reg, reg) then
  1506. Result := True
  1507. else
  1508. begin
  1509. Result := False;
  1510. Exit;
  1511. end;
  1512. end;
  1513. end;
  1514. end;
  1515. end;
  1516. end;
  1517. end;
  1518. { Don't do these ones first in case an input operand is equal to an explicit output registers }
  1519. case getsupreg(reg) of
  1520. RS_EAX:
  1521. if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
  1522. begin
  1523. Result := True;
  1524. Exit;
  1525. end;
  1526. RS_ECX:
  1527. if ([Ch_WECX{$ifdef x86_64},Ch_WRCX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ECX, reg) then
  1528. begin
  1529. Result := True;
  1530. Exit;
  1531. end;
  1532. RS_EDX:
  1533. if ([Ch_REDX{$ifdef x86_64},Ch_WRDX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDX, reg) then
  1534. begin
  1535. Result := True;
  1536. Exit;
  1537. end;
  1538. RS_EBX:
  1539. if ([Ch_WEBX{$ifdef x86_64},Ch_WRBX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBX, reg) then
  1540. begin
  1541. Result := True;
  1542. Exit;
  1543. end;
  1544. RS_ESP:
  1545. if ([Ch_WESP{$ifdef x86_64},Ch_WRSP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESP, reg) then
  1546. begin
  1547. Result := True;
  1548. Exit;
  1549. end;
  1550. RS_EBP:
  1551. if ([Ch_WEBP{$ifdef x86_64},Ch_WRBP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBP, reg) then
  1552. begin
  1553. Result := True;
  1554. Exit;
  1555. end;
  1556. RS_ESI:
  1557. if ([Ch_WESI{$ifdef x86_64},Ch_WRSI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESI, reg) then
  1558. begin
  1559. Result := True;
  1560. Exit;
  1561. end;
  1562. RS_EDI:
  1563. if ([Ch_WEDI{$ifdef x86_64},Ch_WRDI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDI, reg) then
  1564. begin
  1565. Result := True;
  1566. Exit;
  1567. end;
  1568. else
  1569. ;
  1570. end;
  1571. end;
  1572. end;
  1573. end;
  1574. end;
  1575. end;
  1576. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1577. var
  1578. hp2,hp3 : tai;
  1579. begin
  1580. { some x86-64 issue a NOP before the real exit code }
  1581. if MatchInstruction(p,A_NOP,[]) then
  1582. GetNextInstruction(p,p);
  1583. result:=assigned(p) and (p.typ=ait_instruction) and
  1584. ((taicpu(p).opcode = A_RET) or
  1585. ((taicpu(p).opcode=A_LEAVE) and
  1586. GetNextInstruction(p,hp2) and
  1587. MatchInstruction(hp2,A_RET,[S_NO])
  1588. ) or
  1589. (((taicpu(p).opcode=A_LEA) and
  1590. MatchOpType(taicpu(p),top_ref,top_reg) and
  1591. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1592. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1593. ) and
  1594. GetNextInstruction(p,hp2) and
  1595. MatchInstruction(hp2,A_RET,[S_NO])
  1596. ) or
  1597. ((((taicpu(p).opcode=A_MOV) and
  1598. MatchOpType(taicpu(p),top_reg,top_reg) and
  1599. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1600. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1601. ((taicpu(p).opcode=A_LEA) and
  1602. MatchOpType(taicpu(p),top_ref,top_reg) and
  1603. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1604. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1605. )
  1606. ) and
  1607. GetNextInstruction(p,hp2) and
  1608. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1609. MatchOpType(taicpu(hp2),top_reg) and
  1610. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1611. GetNextInstruction(hp2,hp3) and
  1612. MatchInstruction(hp3,A_RET,[S_NO])
  1613. )
  1614. );
  1615. end;
  1616. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1617. begin
  1618. isFoldableArithOp := False;
  1619. case hp1.opcode of
  1620. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1621. isFoldableArithOp :=
  1622. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1623. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1624. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1625. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1626. (taicpu(hp1).oper[1]^.reg = reg);
  1627. A_INC,A_DEC,A_NEG,A_NOT:
  1628. isFoldableArithOp :=
  1629. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1630. (taicpu(hp1).oper[0]^.reg = reg);
  1631. else
  1632. ;
  1633. end;
  1634. end;
  1635. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1636. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1637. var
  1638. hp2: tai;
  1639. begin
  1640. hp2 := p;
  1641. repeat
  1642. hp2 := tai(hp2.previous);
  1643. if assigned(hp2) and
  1644. (hp2.typ = ait_regalloc) and
  1645. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1646. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1647. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1648. begin
  1649. RemoveInstruction(hp2);
  1650. break;
  1651. end;
  1652. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1653. end;
  1654. begin
  1655. case current_procinfo.procdef.returndef.typ of
  1656. arraydef,recorddef,pointerdef,
  1657. stringdef,enumdef,procdef,objectdef,errordef,
  1658. filedef,setdef,procvardef,
  1659. classrefdef,forwarddef:
  1660. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1661. orddef:
  1662. if current_procinfo.procdef.returndef.size <> 0 then
  1663. begin
  1664. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1665. { for int64/qword }
  1666. if current_procinfo.procdef.returndef.size = 8 then
  1667. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1668. end;
  1669. else
  1670. ;
  1671. end;
  1672. end;
  1673. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1674. var
  1675. hp1,hp2 : tai;
  1676. begin
  1677. result:=false;
  1678. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1679. begin
  1680. { vmova* reg1,reg1
  1681. =>
  1682. <nop> }
  1683. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1684. begin
  1685. RemoveCurrentP(p);
  1686. result:=true;
  1687. exit;
  1688. end
  1689. else if GetNextInstruction(p,hp1) then
  1690. begin
  1691. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1692. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1693. begin
  1694. { vmova* reg1,reg2
  1695. vmova* reg2,reg3
  1696. dealloc reg2
  1697. =>
  1698. vmova* reg1,reg3 }
  1699. TransferUsedRegs(TmpUsedRegs);
  1700. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1701. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1702. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1703. begin
  1704. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 1',p);
  1705. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1706. RemoveInstruction(hp1);
  1707. result:=true;
  1708. exit;
  1709. end
  1710. { special case:
  1711. vmova* reg1,<op>
  1712. vmova* <op>,reg1
  1713. =>
  1714. vmova* reg1,<op> }
  1715. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1716. ((taicpu(p).oper[0]^.typ<>top_ref) or
  1717. (not(vol_read in taicpu(p).oper[0]^.ref^.volatility))
  1718. ) then
  1719. begin
  1720. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 2',p);
  1721. RemoveInstruction(hp1);
  1722. result:=true;
  1723. exit;
  1724. end
  1725. end
  1726. else if ((MatchInstruction(p,[A_MOVAPS,A_VMOVAPS],[S_NO]) and
  1727. MatchInstruction(hp1,[A_MOVSS,A_VMOVSS],[S_NO])) or
  1728. ((MatchInstruction(p,[A_MOVAPD,A_VMOVAPD],[S_NO]) and
  1729. MatchInstruction(hp1,[A_MOVSD,A_VMOVSD],[S_NO])))
  1730. ) and
  1731. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1732. begin
  1733. { vmova* reg1,reg2
  1734. vmovs* reg2,<op>
  1735. dealloc reg2
  1736. =>
  1737. vmovs* reg1,reg3 }
  1738. TransferUsedRegs(TmpUsedRegs);
  1739. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1740. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1741. begin
  1742. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVS*2(V)MOVS* 1',p);
  1743. taicpu(p).opcode:=taicpu(hp1).opcode;
  1744. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1745. RemoveInstruction(hp1);
  1746. result:=true;
  1747. exit;
  1748. end
  1749. end;
  1750. end;
  1751. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1752. begin
  1753. if MatchInstruction(hp1,[A_VFMADDPD,
  1754. A_VFMADD132PD,
  1755. A_VFMADD132PS,
  1756. A_VFMADD132SD,
  1757. A_VFMADD132SS,
  1758. A_VFMADD213PD,
  1759. A_VFMADD213PS,
  1760. A_VFMADD213SD,
  1761. A_VFMADD213SS,
  1762. A_VFMADD231PD,
  1763. A_VFMADD231PS,
  1764. A_VFMADD231SD,
  1765. A_VFMADD231SS,
  1766. A_VFMADDSUB132PD,
  1767. A_VFMADDSUB132PS,
  1768. A_VFMADDSUB213PD,
  1769. A_VFMADDSUB213PS,
  1770. A_VFMADDSUB231PD,
  1771. A_VFMADDSUB231PS,
  1772. A_VFMSUB132PD,
  1773. A_VFMSUB132PS,
  1774. A_VFMSUB132SD,
  1775. A_VFMSUB132SS,
  1776. A_VFMSUB213PD,
  1777. A_VFMSUB213PS,
  1778. A_VFMSUB213SD,
  1779. A_VFMSUB213SS,
  1780. A_VFMSUB231PD,
  1781. A_VFMSUB231PS,
  1782. A_VFMSUB231SD,
  1783. A_VFMSUB231SS,
  1784. A_VFMSUBADD132PD,
  1785. A_VFMSUBADD132PS,
  1786. A_VFMSUBADD213PD,
  1787. A_VFMSUBADD213PS,
  1788. A_VFMSUBADD231PD,
  1789. A_VFMSUBADD231PS,
  1790. A_VFNMADD132PD,
  1791. A_VFNMADD132PS,
  1792. A_VFNMADD132SD,
  1793. A_VFNMADD132SS,
  1794. A_VFNMADD213PD,
  1795. A_VFNMADD213PS,
  1796. A_VFNMADD213SD,
  1797. A_VFNMADD213SS,
  1798. A_VFNMADD231PD,
  1799. A_VFNMADD231PS,
  1800. A_VFNMADD231SD,
  1801. A_VFNMADD231SS,
  1802. A_VFNMSUB132PD,
  1803. A_VFNMSUB132PS,
  1804. A_VFNMSUB132SD,
  1805. A_VFNMSUB132SS,
  1806. A_VFNMSUB213PD,
  1807. A_VFNMSUB213PS,
  1808. A_VFNMSUB213SD,
  1809. A_VFNMSUB213SS,
  1810. A_VFNMSUB231PD,
  1811. A_VFNMSUB231PS,
  1812. A_VFNMSUB231SD,
  1813. A_VFNMSUB231SS],[S_NO]) and
  1814. { we mix single and double opperations here because we assume that the compiler
  1815. generates vmovapd only after double operations and vmovaps only after single operations }
  1816. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1817. GetNextInstruction(hp1,hp2) and
  1818. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1819. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1820. begin
  1821. TransferUsedRegs(TmpUsedRegs);
  1822. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1823. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1824. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1825. begin
  1826. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1827. RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit]
  1828. RemoveInstruction(hp2);
  1829. end;
  1830. end
  1831. else if (hp1.typ = ait_instruction) and
  1832. GetNextInstruction(hp1, hp2) and
  1833. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1834. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1835. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1836. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1837. (((taicpu(p).opcode=A_MOVAPS) and
  1838. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1839. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1840. ((taicpu(p).opcode=A_MOVAPD) and
  1841. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1842. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1843. ) then
  1844. { change
  1845. movapX reg,reg2
  1846. addsX/subsX/... reg3, reg2
  1847. movapX reg2,reg
  1848. to
  1849. addsX/subsX/... reg3,reg
  1850. }
  1851. begin
  1852. TransferUsedRegs(TmpUsedRegs);
  1853. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1854. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1855. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1856. begin
  1857. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1858. debug_op2str(taicpu(p).opcode)+' '+
  1859. debug_op2str(taicpu(hp1).opcode)+' '+
  1860. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1861. { we cannot eliminate the first move if
  1862. the operations uses the same register for source and dest }
  1863. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1864. RemoveCurrentP(p, nil);
  1865. p:=hp1;
  1866. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1867. RemoveInstruction(hp2);
  1868. result:=true;
  1869. end;
  1870. end;
  1871. end;
  1872. end;
  1873. end;
  1874. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1875. var
  1876. hp1 : tai;
  1877. begin
  1878. result:=false;
  1879. { replace
  1880. V<Op>X %mreg1,%mreg2,%mreg3
  1881. VMovX %mreg3,%mreg4
  1882. dealloc %mreg3
  1883. by
  1884. V<Op>X %mreg1,%mreg2,%mreg4
  1885. ?
  1886. }
  1887. if GetNextInstruction(p,hp1) and
  1888. { we mix single and double operations here because we assume that the compiler
  1889. generates vmovapd only after double operations and vmovaps only after single operations }
  1890. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1891. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1892. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1893. begin
  1894. TransferUsedRegs(TmpUsedRegs);
  1895. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1896. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  1897. begin
  1898. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1899. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1900. RemoveInstruction(hp1);
  1901. result:=true;
  1902. end;
  1903. end;
  1904. end;
  1905. { Replaces all references to AOldReg in a memory reference to ANewReg }
  1906. class function TX86AsmOptimizer.ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean;
  1907. begin
  1908. Result := False;
  1909. { For safety reasons, only check for exact register matches }
  1910. { Check base register }
  1911. if (ref.base = AOldReg) then
  1912. begin
  1913. ref.base := ANewReg;
  1914. Result := True;
  1915. end;
  1916. { Check index register }
  1917. if (ref.index = AOldReg) then
  1918. begin
  1919. ref.index := ANewReg;
  1920. Result := True;
  1921. end;
  1922. end;
  1923. { Replaces all references to AOldReg in an operand to ANewReg }
  1924. class function TX86AsmOptimizer.ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean;
  1925. var
  1926. OldSupReg, NewSupReg: TSuperRegister;
  1927. OldSubReg, NewSubReg: TSubRegister;
  1928. OldRegType: TRegisterType;
  1929. ThisOper: POper;
  1930. begin
  1931. ThisOper := p.oper[OperIdx]; { Faster to access overall }
  1932. Result := False;
  1933. if (AOldReg = NR_NO) or (ANewReg = NR_NO) then
  1934. InternalError(2020011801);
  1935. OldSupReg := getsupreg(AOldReg);
  1936. OldSubReg := getsubreg(AOldReg);
  1937. OldRegType := getregtype(AOldReg);
  1938. NewSupReg := getsupreg(ANewReg);
  1939. NewSubReg := getsubreg(ANewReg);
  1940. if OldRegType <> getregtype(ANewReg) then
  1941. InternalError(2020011802);
  1942. if OldSubReg <> NewSubReg then
  1943. InternalError(2020011803);
  1944. case ThisOper^.typ of
  1945. top_reg:
  1946. if (
  1947. (ThisOper^.reg = AOldReg) or
  1948. (
  1949. (OldRegType = R_INTREGISTER) and
  1950. (getsupreg(ThisOper^.reg) = OldSupReg) and
  1951. (getregtype(ThisOper^.reg) = R_INTREGISTER) and
  1952. (
  1953. (getsubreg(ThisOper^.reg) <= OldSubReg)
  1954. {$ifndef x86_64}
  1955. and (
  1956. { Under i386 and i8086, ESI, EDI, EBP and ESP
  1957. don't have an 8-bit representation }
  1958. (getsubreg(ThisOper^.reg) >= R_SUBW) or
  1959. not (NewSupReg in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  1960. )
  1961. {$endif x86_64}
  1962. )
  1963. )
  1964. ) then
  1965. begin
  1966. ThisOper^.reg := newreg(getregtype(ANewReg), NewSupReg, getsubreg(p.oper[OperIdx]^.reg));
  1967. Result := True;
  1968. end;
  1969. top_ref:
  1970. if ReplaceRegisterInRef(ThisOper^.ref^, AOldReg, ANewReg) then
  1971. Result := True;
  1972. else
  1973. ;
  1974. end;
  1975. end;
  1976. { Replaces all references to AOldReg in an instruction to ANewReg }
  1977. function TX86AsmOptimizer.ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  1978. const
  1979. ReadFlag: array[0..3] of TInsChange = (Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Rop4);
  1980. var
  1981. OperIdx: Integer;
  1982. begin
  1983. Result := False;
  1984. for OperIdx := 0 to p.ops - 1 do
  1985. if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
  1986. { The shift and rotate instructions can only use CL }
  1987. not (
  1988. (OperIdx = 0) and
  1989. { This second condition just helps to avoid unnecessarily
  1990. calling MatchInstruction for 10 different opcodes }
  1991. (p.oper[0]^.reg = NR_CL) and
  1992. MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
  1993. ) then
  1994. Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
  1995. end;
  1996. class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
  1997. begin
  1998. Result :=
  1999. (ref^.index = NR_NO) and
  2000. (
  2001. {$ifdef x86_64}
  2002. (
  2003. (ref^.base = NR_RIP) and
  2004. (ref^.refaddr in [addr_pic, addr_pic_no_got])
  2005. ) or
  2006. {$endif x86_64}
  2007. (ref^.base = NR_STACK_POINTER_REG) or
  2008. (ref^.base = current_procinfo.framepointer)
  2009. );
  2010. end;
  2011. function TX86AsmOptimizer.ConvertLEA(const p: taicpu): Boolean;
  2012. var
  2013. l: asizeint;
  2014. begin
  2015. Result := False;
  2016. { Should have been checked previously }
  2017. if p.opcode <> A_LEA then
  2018. InternalError(2020072501);
  2019. { do not mess with the stack point as adjusting it by lea is recommend, except if we optimize for size }
  2020. if (p.oper[1]^.reg=NR_STACK_POINTER_REG) and
  2021. not(cs_opt_size in current_settings.optimizerswitches) then
  2022. exit;
  2023. with p.oper[0]^.ref^ do
  2024. begin
  2025. if (base <> p.oper[1]^.reg) or
  2026. (index <> NR_NO) or
  2027. assigned(symbol) then
  2028. exit;
  2029. l:=offset;
  2030. if (l=1) and UseIncDec then
  2031. begin
  2032. p.opcode:=A_INC;
  2033. p.loadreg(0,p.oper[1]^.reg);
  2034. p.ops:=1;
  2035. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2036. end
  2037. else if (l=-1) and UseIncDec then
  2038. begin
  2039. p.opcode:=A_DEC;
  2040. p.loadreg(0,p.oper[1]^.reg);
  2041. p.ops:=1;
  2042. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2043. end
  2044. else
  2045. begin
  2046. if (l<0) and (l<>-2147483648) then
  2047. begin
  2048. p.opcode:=A_SUB;
  2049. p.loadConst(0,-l);
  2050. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2051. end
  2052. else
  2053. begin
  2054. p.opcode:=A_ADD;
  2055. p.loadConst(0,l);
  2056. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2057. end;
  2058. end;
  2059. end;
  2060. Result := True;
  2061. end;
  2062. function TX86AsmOptimizer.DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  2063. var
  2064. CurrentReg, ReplaceReg: TRegister;
  2065. begin
  2066. Result := False;
  2067. ReplaceReg := taicpu(p_mov).oper[0]^.reg;
  2068. CurrentReg := taicpu(p_mov).oper[1]^.reg;
  2069. case hp.opcode of
  2070. A_FSTSW, A_FNSTSW,
  2071. A_IN, A_INS, A_OUT, A_OUTS,
  2072. A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
  2073. { These routines have explicit operands, but they are restricted in
  2074. what they can be (e.g. IN and OUT can only read from AL, AX or
  2075. EAX. }
  2076. Exit;
  2077. A_IMUL:
  2078. begin
  2079. { The 1-operand version writes to implicit registers
  2080. The 2-operand version reads from the first operator, and reads
  2081. from and writes to the second (equivalent to Ch_ROp1, ChRWOp2).
  2082. the 3-operand version reads from a register that it doesn't write to
  2083. }
  2084. case hp.ops of
  2085. 1:
  2086. if (
  2087. (
  2088. (hp.opsize = S_B) and (getsupreg(CurrentReg) <> RS_EAX)
  2089. ) or
  2090. not (getsupreg(CurrentReg) in [RS_EAX, RS_EDX])
  2091. ) and ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2092. begin
  2093. Result := True;
  2094. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 1)', hp);
  2095. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2096. end;
  2097. 2:
  2098. { Only modify the first parameter }
  2099. if ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2100. begin
  2101. Result := True;
  2102. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 2)', hp);
  2103. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2104. end;
  2105. 3:
  2106. { Only modify the second parameter }
  2107. if ReplaceRegisterInOper(hp, 1, CurrentReg, ReplaceReg) then
  2108. begin
  2109. Result := True;
  2110. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 3)', hp);
  2111. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2112. end;
  2113. else
  2114. InternalError(2020012901);
  2115. end;
  2116. end;
  2117. else
  2118. if (hp.ops > 0) and
  2119. ReplaceRegisterInInstruction(hp, CurrentReg, ReplaceReg) then
  2120. begin
  2121. Result := True;
  2122. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovXXX2MovXXX)', hp);
  2123. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2124. end;
  2125. end;
  2126. end;
  2127. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  2128. var
  2129. hp1, hp2, hp3: tai;
  2130. DoOptimisation, TempBool: Boolean;
  2131. procedure convert_mov_value(signed_movop: tasmop; max_value: tcgint); inline;
  2132. begin
  2133. if taicpu(hp1).opcode = signed_movop then
  2134. begin
  2135. if taicpu(p).oper[0]^.val > max_value shr 1 then
  2136. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val - max_value - 1 { Convert to signed }
  2137. end
  2138. else
  2139. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and max_value; { Trim to unsigned }
  2140. end;
  2141. var
  2142. GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
  2143. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  2144. NewSize: topsize;
  2145. CurrentReg, ActiveReg: TRegister;
  2146. SourceRef, TargetRef: TReference;
  2147. MovAligned, MovUnaligned: TAsmOp;
  2148. begin
  2149. Result:=false;
  2150. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  2151. { remove mov reg1,reg1? }
  2152. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  2153. then
  2154. begin
  2155. DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p);
  2156. { take care of the register (de)allocs following p }
  2157. RemoveCurrentP(p, hp1);
  2158. Result:=true;
  2159. exit;
  2160. end;
  2161. { All the next optimisations require a next instruction }
  2162. if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
  2163. Exit;
  2164. { Look for:
  2165. mov %reg1,%reg2
  2166. ??? %reg2,r/m
  2167. Change to:
  2168. mov %reg1,%reg2
  2169. ??? %reg1,r/m
  2170. }
  2171. if MatchOpType(taicpu(p), top_reg, top_reg) then
  2172. begin
  2173. CurrentReg := taicpu(p).oper[1]^.reg;
  2174. if RegReadByInstruction(CurrentReg, hp1) and
  2175. DeepMOVOpt(taicpu(p), taicpu(hp1)) then
  2176. begin
  2177. TransferUsedRegs(TmpUsedRegs);
  2178. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2179. if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
  2180. { Just in case something didn't get modified (e.g. an
  2181. implicit register) }
  2182. not RegReadByInstruction(CurrentReg, hp1) then
  2183. begin
  2184. { We can remove the original MOV }
  2185. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
  2186. RemoveCurrentp(p, hp1);
  2187. { UsedRegs got updated by RemoveCurrentp }
  2188. Result := True;
  2189. Exit;
  2190. end;
  2191. { If we know a MOV instruction has become a null operation, we might as well
  2192. get rid of it now to save time. }
  2193. if (taicpu(hp1).opcode = A_MOV) and
  2194. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2195. SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
  2196. { Just being a register is enough to confirm it's a null operation }
  2197. (taicpu(hp1).oper[0]^.typ = top_reg) then
  2198. begin
  2199. Result := True;
  2200. { Speed-up to reduce a pipeline stall... if we had something like...
  2201. movl %eax,%edx
  2202. movw %dx,%ax
  2203. ... the second instruction would change to movw %ax,%ax, but
  2204. given that it is now %ax that's active rather than %eax,
  2205. penalties might occur due to a partial register write, so instead,
  2206. change it to a MOVZX instruction when optimising for speed.
  2207. }
  2208. if not (cs_opt_size in current_settings.optimizerswitches) and
  2209. IsMOVZXAcceptable and
  2210. (taicpu(hp1).opsize < taicpu(p).opsize)
  2211. {$ifdef x86_64}
  2212. { operations already implicitly set the upper 64 bits to zero }
  2213. and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
  2214. {$endif x86_64}
  2215. then
  2216. begin
  2217. CurrentReg := taicpu(hp1).oper[1]^.reg;
  2218. DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
  2219. case taicpu(p).opsize of
  2220. S_W:
  2221. if taicpu(hp1).opsize = S_B then
  2222. taicpu(hp1).opsize := S_BL
  2223. else
  2224. InternalError(2020012911);
  2225. S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
  2226. case taicpu(hp1).opsize of
  2227. S_B:
  2228. taicpu(hp1).opsize := S_BL;
  2229. S_W:
  2230. taicpu(hp1).opsize := S_WL;
  2231. else
  2232. InternalError(2020012912);
  2233. end;
  2234. else
  2235. InternalError(2020012910);
  2236. end;
  2237. taicpu(hp1).opcode := A_MOVZX;
  2238. taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
  2239. end
  2240. else
  2241. begin
  2242. GetNextInstruction_p := GetNextInstruction(hp1, hp2);
  2243. DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
  2244. RemoveInstruction(hp1);
  2245. { The instruction after what was hp1 is now the immediate next instruction,
  2246. so we can continue to make optimisations if it's present }
  2247. if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
  2248. Exit;
  2249. hp1 := hp2;
  2250. end;
  2251. end;
  2252. end;
  2253. end;
  2254. { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
  2255. overwrites the original destination register. e.g.
  2256. movl ###,%reg2d
  2257. movslq ###,%reg2q (### doesn't have to be the same as the first one)
  2258. In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
  2259. }
  2260. if (taicpu(p).oper[1]^.typ = top_reg) and
  2261. MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
  2262. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2263. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
  2264. begin
  2265. if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
  2266. begin
  2267. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2268. case taicpu(p).oper[0]^.typ of
  2269. top_const:
  2270. { We have something like:
  2271. movb $x, %regb
  2272. movzbl %regb,%regd
  2273. Change to:
  2274. movl $x, %regd
  2275. }
  2276. begin
  2277. case taicpu(hp1).opsize of
  2278. S_BW:
  2279. begin
  2280. convert_mov_value(A_MOVSX, $FF);
  2281. setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
  2282. taicpu(p).opsize := S_W;
  2283. end;
  2284. S_BL:
  2285. begin
  2286. convert_mov_value(A_MOVSX, $FF);
  2287. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2288. taicpu(p).opsize := S_L;
  2289. end;
  2290. S_WL:
  2291. begin
  2292. convert_mov_value(A_MOVSX, $FFFF);
  2293. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2294. taicpu(p).opsize := S_L;
  2295. end;
  2296. {$ifdef x86_64}
  2297. S_BQ:
  2298. begin
  2299. convert_mov_value(A_MOVSX, $FF);
  2300. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2301. taicpu(p).opsize := S_Q;
  2302. end;
  2303. S_WQ:
  2304. begin
  2305. convert_mov_value(A_MOVSX, $FFFF);
  2306. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2307. taicpu(p).opsize := S_Q;
  2308. end;
  2309. S_LQ:
  2310. begin
  2311. convert_mov_value(A_MOVSXD, $FFFFFFFF); { Note it's MOVSXD, not MOVSX }
  2312. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2313. taicpu(p).opsize := S_Q;
  2314. end;
  2315. {$endif x86_64}
  2316. else
  2317. { If hp1 was a MOV instruction, it should have been
  2318. optimised already }
  2319. InternalError(2020021001);
  2320. end;
  2321. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
  2322. RemoveInstruction(hp1);
  2323. Result := True;
  2324. Exit;
  2325. end;
  2326. top_ref:
  2327. { We have something like:
  2328. movb mem, %regb
  2329. movzbl %regb,%regd
  2330. Change to:
  2331. movzbl mem, %regd
  2332. }
  2333. if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
  2334. begin
  2335. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
  2336. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  2337. RemoveCurrentP(p, hp1);
  2338. Result:=True;
  2339. Exit;
  2340. end;
  2341. else
  2342. if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
  2343. { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
  2344. Exit;
  2345. end;
  2346. end
  2347. { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
  2348. and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
  2349. optimised }
  2350. else
  2351. begin
  2352. DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
  2353. RemoveCurrentP(p, hp1);
  2354. Result := True;
  2355. Exit;
  2356. end;
  2357. end;
  2358. if (taicpu(hp1).opcode = A_AND) and
  2359. (taicpu(p).oper[1]^.typ = top_reg) and
  2360. MatchOpType(taicpu(hp1),top_const,top_reg) then
  2361. begin
  2362. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  2363. begin
  2364. case taicpu(p).opsize of
  2365. S_L:
  2366. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2367. begin
  2368. { Optimize out:
  2369. mov x, %reg
  2370. and ffffffffh, %reg
  2371. }
  2372. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  2373. RemoveInstruction(hp1);
  2374. Result:=true;
  2375. exit;
  2376. end;
  2377. S_Q: { TODO: Confirm if this is even possible }
  2378. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  2379. begin
  2380. { Optimize out:
  2381. mov x, %reg
  2382. and ffffffffffffffffh, %reg
  2383. }
  2384. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  2385. RemoveInstruction(hp1);
  2386. Result:=true;
  2387. exit;
  2388. end;
  2389. else
  2390. ;
  2391. end;
  2392. if ((taicpu(p).oper[0]^.typ=top_reg) or
  2393. ((taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr<>addr_full))) and
  2394. GetNextInstruction(hp1,hp2) and
  2395. MatchInstruction(hp2,A_TEST,[taicpu(p).opsize]) and
  2396. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) and
  2397. (MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or
  2398. MatchOperand(taicpu(hp2).oper[0]^,-1)) and
  2399. GetNextInstruction(hp2,hp3) and
  2400. MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and
  2401. (taicpu(hp3).condition in [C_E,C_NE]) then
  2402. begin
  2403. TransferUsedRegs(TmpUsedRegs);
  2404. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2405. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2406. if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then
  2407. begin
  2408. DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p);
  2409. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  2410. taicpu(hp1).opcode:=A_TEST;
  2411. RemoveInstruction(hp2);
  2412. RemoveCurrentP(p, hp1);
  2413. Result:=true;
  2414. exit;
  2415. end;
  2416. end;
  2417. end
  2418. else if IsMOVZXAcceptable and
  2419. (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  2420. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  2421. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2422. then
  2423. begin
  2424. InputVal := debug_operstr(taicpu(p).oper[0]^);
  2425. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  2426. case taicpu(p).opsize of
  2427. S_B:
  2428. if (taicpu(hp1).oper[0]^.val = $ff) then
  2429. begin
  2430. { Convert:
  2431. movb x, %regl movb x, %regl
  2432. andw ffh, %regw andl ffh, %regd
  2433. To:
  2434. movzbw x, %regd movzbl x, %regd
  2435. (Identical registers, just different sizes)
  2436. }
  2437. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  2438. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  2439. case taicpu(hp1).opsize of
  2440. S_W: NewSize := S_BW;
  2441. S_L: NewSize := S_BL;
  2442. {$ifdef x86_64}
  2443. S_Q: NewSize := S_BQ;
  2444. {$endif x86_64}
  2445. else
  2446. InternalError(2018011510);
  2447. end;
  2448. end
  2449. else
  2450. NewSize := S_NO;
  2451. S_W:
  2452. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2453. begin
  2454. { Convert:
  2455. movw x, %regw
  2456. andl ffffh, %regd
  2457. To:
  2458. movzwl x, %regd
  2459. (Identical registers, just different sizes)
  2460. }
  2461. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  2462. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  2463. case taicpu(hp1).opsize of
  2464. S_L: NewSize := S_WL;
  2465. {$ifdef x86_64}
  2466. S_Q: NewSize := S_WQ;
  2467. {$endif x86_64}
  2468. else
  2469. InternalError(2018011511);
  2470. end;
  2471. end
  2472. else
  2473. NewSize := S_NO;
  2474. else
  2475. NewSize := S_NO;
  2476. end;
  2477. if NewSize <> S_NO then
  2478. begin
  2479. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  2480. { The actual optimization }
  2481. taicpu(p).opcode := A_MOVZX;
  2482. taicpu(p).changeopsize(NewSize);
  2483. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  2484. { Safeguard if "and" is followed by a conditional command }
  2485. TransferUsedRegs(TmpUsedRegs);
  2486. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2487. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  2488. begin
  2489. { At this point, the "and" command is effectively equivalent to
  2490. "test %reg,%reg". This will be handled separately by the
  2491. Peephole Optimizer. [Kit] }
  2492. DebugMsg(SPeepholeOptimization + PreMessage +
  2493. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2494. end
  2495. else
  2496. begin
  2497. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  2498. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2499. RemoveInstruction(hp1);
  2500. end;
  2501. Result := True;
  2502. Exit;
  2503. end;
  2504. end;
  2505. end;
  2506. if (taicpu(hp1).opcode = A_OR) and
  2507. (taicpu(p).oper[1]^.typ = top_reg) and
  2508. MatchOperand(taicpu(p).oper[0]^, 0) and
  2509. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
  2510. begin
  2511. { mov 0, %reg
  2512. or ###,%reg
  2513. Change to (only if the flags are not used):
  2514. mov ###,%reg
  2515. }
  2516. TransferUsedRegs(TmpUsedRegs);
  2517. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2518. DoOptimisation := True;
  2519. { Even if the flags are used, we might be able to do the optimisation
  2520. if the conditions are predictable }
  2521. if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2522. begin
  2523. { Only perform if ### = %reg (the same register) or equal to 0,
  2524. so %reg is guaranteed to still have a value of zero }
  2525. if MatchOperand(taicpu(hp1).oper[0]^, 0) or
  2526. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) then
  2527. begin
  2528. hp2 := hp1;
  2529. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2530. while RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
  2531. GetNextInstruction(hp2, hp3) do
  2532. begin
  2533. { Don't continue modifying if the flags state is getting changed }
  2534. if RegModifiedByInstruction(NR_DEFAULTFLAGS, hp3) then
  2535. Break;
  2536. UpdateUsedRegs(TmpUsedRegs, tai(hp3.Next));
  2537. if MatchInstruction(hp3, A_Jcc, A_SETcc, A_CMOVcc, []) then
  2538. begin
  2539. if condition_in(C_E, taicpu(hp3).condition) or (taicpu(hp3).condition in [C_NC, C_NS, C_NO]) then
  2540. begin
  2541. { Condition is always true }
  2542. case taicpu(hp3).opcode of
  2543. A_Jcc:
  2544. begin
  2545. DebugMsg(SPeepholeOptimization + 'Condition is always true (jump made unconditional)', hp3);
  2546. { Check for jump shortcuts before we destroy the condition }
  2547. DoJumpOptimizations(hp3, TempBool);
  2548. MakeUnconditional(taicpu(hp3));
  2549. Result := True;
  2550. end;
  2551. A_CMOVcc:
  2552. begin
  2553. DebugMsg(SPeepholeOptimization + 'Condition is always true (CMOVcc -> MOV)', hp3);
  2554. taicpu(hp3).opcode := A_MOV;
  2555. taicpu(hp3).condition := C_None;
  2556. Result := True;
  2557. end;
  2558. A_SETcc:
  2559. begin
  2560. DebugMsg(SPeepholeOptimization + 'Condition is always true (changed to MOV 1)', hp3);
  2561. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  2562. taicpu(hp3).opcode := A_MOV;
  2563. taicpu(hp3).ops := 2;
  2564. taicpu(hp3).condition := C_None;
  2565. taicpu(hp3).opsize := S_B;
  2566. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2567. taicpu(hp3).loadconst(0, 1);
  2568. Result := True;
  2569. end;
  2570. else
  2571. InternalError(2021090701);
  2572. end;
  2573. end
  2574. else if (taicpu(hp3).condition in [C_A, C_B, C_C, C_G, C_L, C_NE, C_NZ, C_O, C_S]) then
  2575. begin
  2576. { Condition is always false }
  2577. case taicpu(hp3).opcode of
  2578. A_Jcc:
  2579. begin
  2580. DebugMsg(SPeepholeOptimization + 'Condition is always false (jump removed)', hp3);
  2581. TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2582. RemoveInstruction(hp3);
  2583. Result := True;
  2584. { Since hp3 was deleted, hp2 must not be updated }
  2585. Continue;
  2586. end;
  2587. A_CMOVcc:
  2588. begin
  2589. DebugMsg(SPeepholeOptimization + 'Condition is always false (conditional load removed)', hp3);
  2590. RemoveInstruction(hp3);
  2591. Result := True;
  2592. { Since hp3 was deleted, hp2 must not be updated }
  2593. Continue;
  2594. end;
  2595. A_SETcc:
  2596. begin
  2597. DebugMsg(SPeepholeOptimization + 'Condition is always false (changed to MOV 0)', hp3);
  2598. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  2599. taicpu(hp3).opcode := A_MOV;
  2600. taicpu(hp3).ops := 2;
  2601. taicpu(hp3).condition := C_None;
  2602. taicpu(hp3).opsize := S_B;
  2603. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2604. taicpu(hp3).loadconst(0, 0);
  2605. Result := True;
  2606. end;
  2607. else
  2608. InternalError(2021090702);
  2609. end;
  2610. end
  2611. else
  2612. { Uncertain what to do - don't optimise (although optimise other conditional statements if present) }
  2613. DoOptimisation := False;
  2614. end;
  2615. hp2 := hp3;
  2616. end;
  2617. { Flags are still in use - don't optimise }
  2618. if DoOptimisation and RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2619. DoOptimisation := False;
  2620. end
  2621. else
  2622. DoOptimisation := False;
  2623. end;
  2624. if DoOptimisation then
  2625. begin
  2626. {$ifdef x86_64}
  2627. { OR only supports 32-bit sign-extended constants for 64-bit
  2628. instructions, so compensate for this if the constant is
  2629. encoded as a value greater than or equal to 2^31 }
  2630. if (taicpu(hp1).opsize = S_Q) and
  2631. (taicpu(hp1).oper[0]^.typ = top_const) and
  2632. (taicpu(hp1).oper[0]^.val >= $80000000) then
  2633. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or $FFFFFFFF00000000;
  2634. {$endif x86_64}
  2635. DebugMsg(SPeepholeOptimization + 'MOV 0 / OR -> MOV', p);
  2636. taicpu(hp1).opcode := A_MOV;
  2637. RemoveCurrentP(p, hp1);
  2638. Result := True;
  2639. Exit;
  2640. end;
  2641. end;
  2642. { Next instruction is also a MOV ? }
  2643. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  2644. begin
  2645. if (taicpu(p).oper[1]^.typ = top_reg) and
  2646. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  2647. begin
  2648. CurrentReg := taicpu(p).oper[1]^.reg;
  2649. TransferUsedRegs(TmpUsedRegs);
  2650. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2651. { we have
  2652. mov x, %treg
  2653. mov %treg, y
  2654. }
  2655. if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
  2656. if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
  2657. { we've got
  2658. mov x, %treg
  2659. mov %treg, y
  2660. with %treg is not used after }
  2661. case taicpu(p).oper[0]^.typ Of
  2662. { top_reg is covered by DeepMOVOpt }
  2663. top_const:
  2664. begin
  2665. { change
  2666. mov const, %treg
  2667. mov %treg, y
  2668. to
  2669. mov const, y
  2670. }
  2671. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  2672. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  2673. begin
  2674. if taicpu(hp1).oper[1]^.typ=top_reg then
  2675. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2676. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  2677. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  2678. RemoveInstruction(hp1);
  2679. Result:=true;
  2680. Exit;
  2681. end;
  2682. end;
  2683. top_ref:
  2684. case taicpu(hp1).oper[1]^.typ of
  2685. top_reg:
  2686. begin
  2687. { change
  2688. mov mem, %treg
  2689. mov %treg, %reg
  2690. to
  2691. mov mem, %reg"
  2692. }
  2693. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2694. taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
  2695. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  2696. RemoveInstruction(hp1);
  2697. Result:=true;
  2698. Exit;
  2699. end;
  2700. top_ref:
  2701. begin
  2702. {$ifdef x86_64}
  2703. { Look for the following to simplify:
  2704. mov x(mem1), %reg
  2705. mov %reg, y(mem2)
  2706. mov x+8(mem1), %reg
  2707. mov %reg, y+8(mem2)
  2708. Change to:
  2709. movdqu x(mem1), %xmmreg
  2710. movdqu %xmmreg, y(mem2)
  2711. }
  2712. SourceRef := taicpu(p).oper[0]^.ref^;
  2713. TargetRef := taicpu(hp1).oper[1]^.ref^;
  2714. if (taicpu(p).opsize = S_Q) and
  2715. GetNextInstruction(hp1, hp2) and
  2716. MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
  2717. MatchOpType(taicpu(hp2), top_ref, top_reg) then
  2718. begin
  2719. { Delay calling GetNextInstruction(hp2, hp3) for as long as possible }
  2720. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2721. Inc(SourceRef.offset, 8);
  2722. if UseAVX then
  2723. begin
  2724. MovAligned := A_VMOVDQA;
  2725. MovUnaligned := A_VMOVDQU;
  2726. end
  2727. else
  2728. begin
  2729. MovAligned := A_MOVDQA;
  2730. MovUnaligned := A_MOVDQU;
  2731. end;
  2732. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2733. begin
  2734. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2735. Inc(TargetRef.offset, 8);
  2736. if GetNextInstruction(hp2, hp3) and
  2737. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2738. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2739. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2740. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2741. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2742. begin
  2743. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2744. if CurrentReg <> NR_NO then
  2745. begin
  2746. { Remember that the offsets are 8 ahead }
  2747. if ((SourceRef.offset mod 16) = 8) and
  2748. (
  2749. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2750. (SourceRef.base = current_procinfo.framepointer) or
  2751. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2752. ) then
  2753. taicpu(p).opcode := MovAligned
  2754. else
  2755. taicpu(p).opcode := MovUnaligned;
  2756. taicpu(p).opsize := S_XMM;
  2757. taicpu(p).oper[1]^.reg := CurrentReg;
  2758. if ((TargetRef.offset mod 16) = 8) and
  2759. (
  2760. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2761. (TargetRef.base = current_procinfo.framepointer) or
  2762. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2763. ) then
  2764. taicpu(hp1).opcode := MovAligned
  2765. else
  2766. taicpu(hp1).opcode := MovUnaligned;
  2767. taicpu(hp1).opsize := S_XMM;
  2768. taicpu(hp1).oper[0]^.reg := CurrentReg;
  2769. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
  2770. RemoveInstruction(hp2);
  2771. RemoveInstruction(hp3);
  2772. Result := True;
  2773. Exit;
  2774. end;
  2775. end;
  2776. end
  2777. else
  2778. begin
  2779. { See if the next references are 8 less rather than 8 greater }
  2780. Dec(SourceRef.offset, 16); { -8 the other way }
  2781. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2782. begin
  2783. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2784. Dec(TargetRef.offset, 8); { Only 8, not 16, as it wasn't incremented unlike SourceRef }
  2785. if GetNextInstruction(hp2, hp3) and
  2786. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2787. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2788. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2789. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2790. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2791. begin
  2792. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2793. if CurrentReg <> NR_NO then
  2794. begin
  2795. { hp2 and hp3 are the starting offsets, so mod 0 this time }
  2796. if ((SourceRef.offset mod 16) = 0) and
  2797. (
  2798. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2799. (SourceRef.base = current_procinfo.framepointer) or
  2800. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2801. ) then
  2802. taicpu(hp2).opcode := MovAligned
  2803. else
  2804. taicpu(hp2).opcode := MovUnaligned;
  2805. taicpu(hp2).opsize := S_XMM;
  2806. taicpu(hp2).oper[1]^.reg := CurrentReg;
  2807. if ((TargetRef.offset mod 16) = 0) and
  2808. (
  2809. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2810. (TargetRef.base = current_procinfo.framepointer) or
  2811. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2812. ) then
  2813. taicpu(hp3).opcode := MovAligned
  2814. else
  2815. taicpu(hp3).opcode := MovUnaligned;
  2816. taicpu(hp3).opsize := S_XMM;
  2817. taicpu(hp3).oper[0]^.reg := CurrentReg;
  2818. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
  2819. RemoveInstruction(hp1);
  2820. RemoveCurrentP(p, hp2);
  2821. Result := True;
  2822. Exit;
  2823. end;
  2824. end;
  2825. end;
  2826. end;
  2827. end;
  2828. {$endif x86_64}
  2829. end;
  2830. else
  2831. { The write target should be a reg or a ref }
  2832. InternalError(2021091601);
  2833. end;
  2834. else
  2835. ;
  2836. end
  2837. else
  2838. { %treg is used afterwards, but all eventualities
  2839. other than the first MOV instruction being a constant
  2840. are covered by DeepMOVOpt, so only check for that }
  2841. if (taicpu(p).oper[0]^.typ = top_const) and
  2842. (
  2843. { For MOV operations, a size saving is only made if the register/const is byte-sized }
  2844. not (cs_opt_size in current_settings.optimizerswitches) or
  2845. (taicpu(hp1).opsize = S_B)
  2846. ) and
  2847. (
  2848. (taicpu(hp1).oper[1]^.typ = top_reg) or
  2849. ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
  2850. ) then
  2851. begin
  2852. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
  2853. taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
  2854. end;
  2855. end;
  2856. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2857. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2858. { mov reg1, mem1 or mov mem1, reg1
  2859. mov mem2, reg2 mov reg2, mem2}
  2860. begin
  2861. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2862. { mov reg1, mem1 or mov mem1, reg1
  2863. mov mem2, reg1 mov reg2, mem1}
  2864. begin
  2865. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2866. { Removes the second statement from
  2867. mov reg1, mem1/reg2
  2868. mov mem1/reg2, reg1 }
  2869. begin
  2870. if taicpu(p).oper[0]^.typ=top_reg then
  2871. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2872. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  2873. RemoveInstruction(hp1);
  2874. Result:=true;
  2875. exit;
  2876. end
  2877. else
  2878. begin
  2879. TransferUsedRegs(TmpUsedRegs);
  2880. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2881. if (taicpu(p).oper[1]^.typ = top_ref) and
  2882. { mov reg1, mem1
  2883. mov mem2, reg1 }
  2884. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  2885. GetNextInstruction(hp1, hp2) and
  2886. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  2887. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2888. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  2889. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  2890. { change to
  2891. mov reg1, mem1 mov reg1, mem1
  2892. mov mem2, reg1 cmp reg1, mem2
  2893. cmp mem1, reg1
  2894. }
  2895. begin
  2896. RemoveInstruction(hp2);
  2897. taicpu(hp1).opcode := A_CMP;
  2898. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  2899. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2900. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2901. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  2902. end;
  2903. end;
  2904. end
  2905. else if (taicpu(p).oper[1]^.typ=top_ref) and
  2906. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2907. begin
  2908. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2909. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2910. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  2911. end
  2912. else
  2913. begin
  2914. TransferUsedRegs(TmpUsedRegs);
  2915. if GetNextInstruction(hp1, hp2) and
  2916. MatchOpType(taicpu(p),top_ref,top_reg) and
  2917. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2918. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2919. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  2920. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  2921. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2922. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  2923. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  2924. { mov mem1, %reg1
  2925. mov %reg1, mem2
  2926. mov mem2, reg2
  2927. to:
  2928. mov mem1, reg2
  2929. mov reg2, mem2}
  2930. begin
  2931. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  2932. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  2933. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  2934. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2935. RemoveInstruction(hp2);
  2936. Result := True;
  2937. end
  2938. {$ifdef i386}
  2939. { this is enabled for i386 only, as the rules to create the reg sets below
  2940. are too complicated for x86-64, so this makes this code too error prone
  2941. on x86-64
  2942. }
  2943. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  2944. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  2945. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  2946. { mov mem1, reg1 mov mem1, reg1
  2947. mov reg1, mem2 mov reg1, mem2
  2948. mov mem2, reg2 mov mem2, reg1
  2949. to: to:
  2950. mov mem1, reg1 mov mem1, reg1
  2951. mov mem1, reg2 mov reg1, mem2
  2952. mov reg1, mem2
  2953. or (if mem1 depends on reg1
  2954. and/or if mem2 depends on reg2)
  2955. to:
  2956. mov mem1, reg1
  2957. mov reg1, mem2
  2958. mov reg1, reg2
  2959. }
  2960. begin
  2961. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2962. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  2963. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  2964. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  2965. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2966. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2967. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2968. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  2969. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  2970. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2971. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  2972. end
  2973. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  2974. begin
  2975. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  2976. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2977. end
  2978. else
  2979. begin
  2980. RemoveInstruction(hp2);
  2981. end
  2982. {$endif i386}
  2983. ;
  2984. end;
  2985. end
  2986. { movl [mem1],reg1
  2987. movl [mem1],reg2
  2988. to
  2989. movl [mem1],reg1
  2990. movl reg1,reg2
  2991. }
  2992. else if MatchOpType(taicpu(p),top_ref,top_reg) and
  2993. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2994. (taicpu(p).opsize = taicpu(hp1).opsize) and
  2995. RefsEqual(taicpu(p).oper[0]^.ref^,taicpu(hp1).oper[0]^.ref^) and
  2996. (taicpu(p).oper[0]^.ref^.volatility=[]) and
  2997. (taicpu(hp1).oper[0]^.ref^.volatility=[]) and
  2998. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.base)) and
  2999. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.index)) then
  3000. begin
  3001. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 2',p);
  3002. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  3003. end;
  3004. { movl const1,[mem1]
  3005. movl [mem1],reg1
  3006. to
  3007. movl const1,reg1
  3008. movl reg1,[mem1]
  3009. }
  3010. if MatchOpType(Taicpu(p),top_const,top_ref) and
  3011. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  3012. (taicpu(p).opsize = taicpu(hp1).opsize) and
  3013. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  3014. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  3015. begin
  3016. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  3017. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  3018. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  3019. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  3020. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  3021. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  3022. Result:=true;
  3023. exit;
  3024. end;
  3025. { mov x,reg1; mov y,reg1 -> mov y,reg1 is handled by the Mov2Nop 5 optimisation }
  3026. end;
  3027. { search further than the next instruction for a mov (as long as it's not a jump) }
  3028. if not is_calljmpuncondret(taicpu(hp1).opcode) and
  3029. { check as much as possible before the expensive GetNextInstructionUsingRegCond call }
  3030. (taicpu(p).oper[1]^.typ = top_reg) and
  3031. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  3032. not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
  3033. begin
  3034. { we work with hp2 here, so hp1 can be still used later on when
  3035. checking for GetNextInstruction_p }
  3036. hp3 := hp1;
  3037. { Initialise CrossJump (if it becomes True at any point, it will remain True) }
  3038. CrossJump := (taicpu(hp1).opcode = A_Jcc);
  3039. { Saves on a large number of dereferences }
  3040. ActiveReg := taicpu(p).oper[1]^.reg;
  3041. while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
  3042. { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
  3043. (hp2.typ=ait_instruction) do
  3044. begin
  3045. case taicpu(hp2).opcode of
  3046. A_MOV:
  3047. if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
  3048. ((taicpu(p).oper[0]^.typ=top_const) or
  3049. ((taicpu(p).oper[0]^.typ=top_reg) and
  3050. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  3051. )
  3052. ) then
  3053. begin
  3054. { we have
  3055. mov x, %treg
  3056. mov %treg, y
  3057. }
  3058. TransferUsedRegs(TmpUsedRegs);
  3059. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3060. { We don't need to call UpdateUsedRegs for every instruction between
  3061. p and hp2 because the register we're concerned about will not
  3062. become deallocated (otherwise GetNextInstructionUsingReg would
  3063. have stopped at an earlier instruction). [Kit] }
  3064. TempRegUsed :=
  3065. CrossJump { Assume the register is in use if it crossed a conditional jump } or
  3066. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or
  3067. RegReadByInstruction(ActiveReg, hp1);
  3068. case taicpu(p).oper[0]^.typ Of
  3069. top_reg:
  3070. begin
  3071. { change
  3072. mov %reg, %treg
  3073. mov %treg, y
  3074. to
  3075. mov %reg, y
  3076. }
  3077. CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
  3078. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3079. if taicpu(hp2).oper[1]^.reg = CurrentReg then
  3080. begin
  3081. { %reg = y - remove hp2 completely (doing it here instead of relying on
  3082. the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
  3083. if TempRegUsed then
  3084. begin
  3085. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
  3086. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3087. { Set the start of the next GetNextInstructionUsingRegCond search
  3088. to start at the entry right before hp2 (which is about to be removed) }
  3089. hp3 := tai(hp2.Previous);
  3090. RemoveInstruction(hp2);
  3091. { See if there's more we can optimise }
  3092. Continue;
  3093. end
  3094. else
  3095. begin
  3096. RemoveInstruction(hp2);
  3097. { We can remove the original MOV too }
  3098. DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
  3099. RemoveCurrentP(p, hp1);
  3100. Result:=true;
  3101. Exit;
  3102. end;
  3103. end
  3104. else
  3105. begin
  3106. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3107. taicpu(hp2).loadReg(0, CurrentReg);
  3108. if TempRegUsed then
  3109. begin
  3110. { Don't remove the first instruction if the temporary register is in use }
  3111. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
  3112. { No need to set Result to True. If there's another instruction later on
  3113. that can be optimised, it will be detected when the main Pass 1 loop
  3114. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3115. end
  3116. else
  3117. begin
  3118. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  3119. RemoveCurrentP(p, hp1);
  3120. Result:=true;
  3121. Exit;
  3122. end;
  3123. end;
  3124. end;
  3125. top_const:
  3126. if not (cs_opt_size in current_settings.optimizerswitches) or (taicpu(hp2).opsize = S_B) then
  3127. begin
  3128. { change
  3129. mov const, %treg
  3130. mov %treg, y
  3131. to
  3132. mov const, y
  3133. }
  3134. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  3135. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  3136. begin
  3137. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3138. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  3139. if TempRegUsed then
  3140. begin
  3141. { Don't remove the first instruction if the temporary register is in use }
  3142. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 7a)',hp2);
  3143. { No need to set Result to True. If there's another instruction later on
  3144. that can be optimised, it will be detected when the main Pass 1 loop
  3145. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3146. end
  3147. else
  3148. begin
  3149. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  3150. RemoveCurrentP(p, hp1);
  3151. Result:=true;
  3152. Exit;
  3153. end;
  3154. end;
  3155. end;
  3156. else
  3157. Internalerror(2019103001);
  3158. end;
  3159. end
  3160. else
  3161. if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then
  3162. begin
  3163. if not CrossJump and
  3164. not RegUsedBetween(ActiveReg, p, hp2) and
  3165. not RegReadByInstruction(ActiveReg, hp2) then
  3166. begin
  3167. { Register is not used before it is overwritten }
  3168. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
  3169. RemoveCurrentp(p, hp1);
  3170. Result := True;
  3171. Exit;
  3172. end;
  3173. if (taicpu(p).oper[0]^.typ = top_const) and
  3174. (taicpu(hp2).oper[0]^.typ = top_const) then
  3175. begin
  3176. if taicpu(p).oper[0]^.val = taicpu(hp2).oper[0]^.val then
  3177. begin
  3178. { Same value - register hasn't changed }
  3179. DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done', hp2);
  3180. RemoveInstruction(hp2);
  3181. Result := True;
  3182. { See if there's more we can optimise }
  3183. Continue;
  3184. end;
  3185. end;
  3186. end;
  3187. A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  3188. if MatchOpType(taicpu(hp2), top_reg, top_reg) and
  3189. MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and
  3190. SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then
  3191. begin
  3192. {
  3193. Change from:
  3194. mov ###, %reg
  3195. ...
  3196. movs/z %reg,%reg (Same register, just different sizes)
  3197. To:
  3198. movs/z ###, %reg (Longer version)
  3199. ...
  3200. (remove)
  3201. }
  3202. DebugMsg(SPeepholeOptimization + 'MovMovs/z2Mov/s/z done', p);
  3203. taicpu(p).oper[1]^.reg := taicpu(hp2).oper[1]^.reg;
  3204. { Keep the first instruction as mov if ### is a constant }
  3205. if taicpu(p).oper[0]^.typ = top_const then
  3206. taicpu(p).opsize := reg2opsize(taicpu(hp2).oper[1]^.reg)
  3207. else
  3208. begin
  3209. taicpu(p).opcode := taicpu(hp2).opcode;
  3210. taicpu(p).opsize := taicpu(hp2).opsize;
  3211. end;
  3212. DebugMsg(SPeepholeOptimization + 'Removed movs/z instruction and extended earlier write (MovMovs/z2Mov/s/z)', hp2);
  3213. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp2, UsedRegs);
  3214. RemoveInstruction(hp2);
  3215. Result := True;
  3216. Exit;
  3217. end;
  3218. else
  3219. if MatchOpType(taicpu(p), top_reg, top_reg) then
  3220. begin
  3221. TransferUsedRegs(TmpUsedRegs);
  3222. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3223. if
  3224. not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and
  3225. not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and
  3226. DeepMovOpt(taicpu(p), taicpu(hp2)) then
  3227. begin
  3228. { Just in case something didn't get modified (e.g. an
  3229. implicit register) }
  3230. if not RegReadByInstruction(ActiveReg, hp2) and
  3231. { If a conditional jump was crossed, do not delete
  3232. the original MOV no matter what }
  3233. not CrossJump then
  3234. begin
  3235. TransferUsedRegs(TmpUsedRegs);
  3236. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3237. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3238. if
  3239. { Make sure the original register isn't still present
  3240. and has been written to (e.g. with SHRX) }
  3241. RegLoadedWithNewValue(ActiveReg, hp2) or
  3242. not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
  3243. begin
  3244. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
  3245. { We can remove the original MOV }
  3246. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
  3247. RemoveCurrentp(p, hp1);
  3248. Result := True;
  3249. Exit;
  3250. end
  3251. else
  3252. begin
  3253. { See if there's more we can optimise }
  3254. hp3 := hp2;
  3255. Continue;
  3256. end;
  3257. end;
  3258. end;
  3259. end;
  3260. end;
  3261. { Break out of the while loop under normal circumstances }
  3262. Break;
  3263. end;
  3264. end;
  3265. if (aoc_MovAnd2Mov_3 in OptsToCheck) and
  3266. (taicpu(p).oper[1]^.typ = top_reg) and
  3267. (taicpu(p).opsize = S_L) and
  3268. GetNextInstructionUsingRegTrackingUse(p,hp2,taicpu(p).oper[1]^.reg) and
  3269. (taicpu(hp2).opcode = A_AND) and
  3270. (MatchOpType(taicpu(hp2),top_const,top_reg) or
  3271. (MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3272. MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^))
  3273. ) then
  3274. begin
  3275. if SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) then
  3276. begin
  3277. if ((taicpu(hp2).oper[0]^.typ=top_const) and (taicpu(hp2).oper[0]^.val = $ffffffff)) or
  3278. ((taicpu(hp2).oper[0]^.typ=top_reg) and (taicpu(hp2).opsize=S_L)) then
  3279. begin
  3280. { Optimize out:
  3281. mov x, %reg
  3282. and ffffffffh, %reg
  3283. }
  3284. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 3 done',p);
  3285. RemoveInstruction(hp2);
  3286. Result:=true;
  3287. exit;
  3288. end;
  3289. end;
  3290. end;
  3291. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  3292. x >= RetOffset) as it doesn't do anything (it writes either to a
  3293. parameter or to the temporary storage room for the function
  3294. result)
  3295. }
  3296. if IsExitCode(hp1) and
  3297. (taicpu(p).oper[1]^.typ = top_ref) and
  3298. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  3299. (
  3300. (
  3301. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  3302. not (
  3303. assigned(current_procinfo.procdef.funcretsym) and
  3304. (taicpu(p).oper[1]^.ref^.offset <= tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)
  3305. )
  3306. ) or
  3307. { Also discard writes to the stack that are below the base pointer,
  3308. as this is temporary storage rather than a function result on the
  3309. stack, say. }
  3310. (
  3311. (taicpu(p).oper[1]^.ref^.base = NR_STACK_POINTER_REG) and
  3312. (taicpu(p).oper[1]^.ref^.offset < current_procinfo.final_localsize)
  3313. )
  3314. ) then
  3315. begin
  3316. RemoveCurrentp(p, hp1);
  3317. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  3318. RemoveLastDeallocForFuncRes(p);
  3319. Result:=true;
  3320. exit;
  3321. end;
  3322. if MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) then
  3323. begin
  3324. if MatchOpType(taicpu(p),top_reg,top_ref) and
  3325. (taicpu(hp1).oper[1]^.typ = top_ref) and
  3326. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  3327. begin
  3328. { change
  3329. mov reg1, mem1
  3330. test/cmp x, mem1
  3331. to
  3332. mov reg1, mem1
  3333. test/cmp x, reg1
  3334. }
  3335. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  3336. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  3337. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3338. Result := True;
  3339. Exit;
  3340. end;
  3341. if MatchOpType(taicpu(p),top_ref,top_reg) and
  3342. { The x86 assemblers have difficulty comparing values against absolute addresses }
  3343. (taicpu(p).oper[0]^.ref^.refaddr in [addr_no, addr_pic, addr_pic_no_got]) and
  3344. (taicpu(hp1).oper[0]^.typ <> top_ref) and
  3345. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  3346. (
  3347. (
  3348. (taicpu(hp1).opcode = A_TEST)
  3349. ) or (
  3350. (taicpu(hp1).opcode = A_CMP) and
  3351. { A sanity check more than anything }
  3352. not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
  3353. )
  3354. ) then
  3355. begin
  3356. { change
  3357. mov mem, %reg
  3358. cmp/test x, %reg / test %reg,%reg
  3359. (reg deallocated)
  3360. to
  3361. cmp/test x, mem / cmp 0, mem
  3362. }
  3363. TransferUsedRegs(TmpUsedRegs);
  3364. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3365. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
  3366. begin
  3367. { Convert test %reg,%reg or test $-1,%reg to cmp $0,mem }
  3368. if (taicpu(hp1).opcode = A_TEST) and
  3369. (
  3370. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) or
  3371. MatchOperand(taicpu(hp1).oper[0]^, -1)
  3372. ) then
  3373. begin
  3374. taicpu(hp1).opcode := A_CMP;
  3375. taicpu(hp1).loadconst(0, 0);
  3376. end;
  3377. taicpu(hp1).loadref(1, taicpu(p).oper[0]^.ref^);
  3378. DebugMsg(SPeepholeOptimization + 'MOV/CMP -> CMP (memory check)', p);
  3379. RemoveCurrentP(p, hp1);
  3380. Result := True;
  3381. Exit;
  3382. end;
  3383. end;
  3384. end;
  3385. if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  3386. { If the flags register is in use, don't change the instruction to an
  3387. ADD otherwise this will scramble the flags. [Kit] }
  3388. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  3389. begin
  3390. if MatchOpType(Taicpu(p),top_ref,top_reg) and
  3391. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  3392. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  3393. ) or
  3394. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  3395. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  3396. )
  3397. ) then
  3398. { mov reg1,ref
  3399. lea reg2,[reg1,reg2]
  3400. to
  3401. add reg2,ref}
  3402. begin
  3403. TransferUsedRegs(TmpUsedRegs);
  3404. { reg1 may not be used afterwards }
  3405. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  3406. begin
  3407. Taicpu(hp1).opcode:=A_ADD;
  3408. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  3409. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  3410. RemoveCurrentp(p, hp1);
  3411. result:=true;
  3412. exit;
  3413. end;
  3414. end;
  3415. { If the LEA instruction can be converted into an arithmetic instruction,
  3416. it may be possible to then fold it in the next optimisation, otherwise
  3417. there's nothing more that can be optimised here. }
  3418. if not ConvertLEA(taicpu(hp1)) then
  3419. Exit;
  3420. end;
  3421. if (taicpu(p).oper[1]^.typ = top_reg) and
  3422. (hp1.typ = ait_instruction) and
  3423. GetNextInstruction(hp1, hp2) and
  3424. MatchInstruction(hp2,A_MOV,[]) and
  3425. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  3426. (topsize2memsize[taicpu(hp1).opsize]>=topsize2memsize[taicpu(hp2).opsize]) and
  3427. (
  3428. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg)
  3429. {$ifdef x86_64}
  3430. or
  3431. (
  3432. (taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  3433. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ))
  3434. )
  3435. {$endif x86_64}
  3436. ) then
  3437. begin
  3438. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  3439. (taicpu(hp2).oper[0]^.typ=top_reg) then
  3440. { change movsX/movzX reg/ref, reg2
  3441. add/sub/or/... reg3/$const, reg2
  3442. mov reg2 reg/ref
  3443. dealloc reg2
  3444. to
  3445. add/sub/or/... reg3/$const, reg/ref }
  3446. begin
  3447. TransferUsedRegs(TmpUsedRegs);
  3448. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3449. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3450. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3451. begin
  3452. { by example:
  3453. movswl %si,%eax movswl %si,%eax p
  3454. decl %eax addl %edx,%eax hp1
  3455. movw %ax,%si movw %ax,%si hp2
  3456. ->
  3457. movswl %si,%eax movswl %si,%eax p
  3458. decw %eax addw %edx,%eax hp1
  3459. movw %ax,%si movw %ax,%si hp2
  3460. }
  3461. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  3462. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3463. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3464. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3465. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3466. {
  3467. ->
  3468. movswl %si,%eax movswl %si,%eax p
  3469. decw %si addw %dx,%si hp1
  3470. movw %ax,%si movw %ax,%si hp2
  3471. }
  3472. case taicpu(hp1).ops of
  3473. 1:
  3474. begin
  3475. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3476. if taicpu(hp1).oper[0]^.typ=top_reg then
  3477. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3478. end;
  3479. 2:
  3480. begin
  3481. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3482. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3483. (taicpu(hp1).opcode<>A_SHL) and
  3484. (taicpu(hp1).opcode<>A_SHR) and
  3485. (taicpu(hp1).opcode<>A_SAR) then
  3486. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3487. end;
  3488. else
  3489. internalerror(2008042701);
  3490. end;
  3491. {
  3492. ->
  3493. decw %si addw %dx,%si p
  3494. }
  3495. RemoveInstruction(hp2);
  3496. RemoveCurrentP(p, hp1);
  3497. Result:=True;
  3498. Exit;
  3499. end;
  3500. end;
  3501. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3502. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  3503. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  3504. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  3505. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  3506. )
  3507. {$ifdef i386}
  3508. { byte registers of esi, edi, ebp, esp are not available on i386 }
  3509. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3510. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3511. {$endif i386}
  3512. then
  3513. { change movsX/movzX reg/ref, reg2
  3514. add/sub/or/... regX/$const, reg2
  3515. mov reg2, reg3
  3516. dealloc reg2
  3517. to
  3518. movsX/movzX reg/ref, reg3
  3519. add/sub/or/... reg3/$const, reg3
  3520. }
  3521. begin
  3522. TransferUsedRegs(TmpUsedRegs);
  3523. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3524. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3525. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3526. begin
  3527. { by example:
  3528. movswl %si,%eax movswl %si,%eax p
  3529. decl %eax addl %edx,%eax hp1
  3530. movw %ax,%si movw %ax,%si hp2
  3531. ->
  3532. movswl %si,%eax movswl %si,%eax p
  3533. decw %eax addw %edx,%eax hp1
  3534. movw %ax,%si movw %ax,%si hp2
  3535. }
  3536. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  3537. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3538. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3539. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3540. { limit size of constants as well to avoid assembler errors, but
  3541. check opsize to avoid overflow when left shifting the 1 }
  3542. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  3543. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  3544. {$ifdef x86_64}
  3545. { Be careful of, for example:
  3546. movl %reg1,%reg2
  3547. addl %reg3,%reg2
  3548. movq %reg2,%reg4
  3549. This will cause problems if the upper 32-bits of %reg3 or %reg4 are non-zero
  3550. }
  3551. if (taicpu(hp1).opsize = S_L) and (taicpu(hp2).opsize = S_Q) then
  3552. begin
  3553. taicpu(hp2).changeopsize(S_L);
  3554. setsubreg(taicpu(hp2).oper[0]^.reg, R_SUBD);
  3555. setsubreg(taicpu(hp2).oper[1]^.reg, R_SUBD);
  3556. end;
  3557. {$endif x86_64}
  3558. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3559. taicpu(p).changeopsize(taicpu(hp2).opsize);
  3560. if taicpu(p).oper[0]^.typ=top_reg then
  3561. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3562. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  3563. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  3564. {
  3565. ->
  3566. movswl %si,%eax movswl %si,%eax p
  3567. decw %si addw %dx,%si hp1
  3568. movw %ax,%si movw %ax,%si hp2
  3569. }
  3570. case taicpu(hp1).ops of
  3571. 1:
  3572. begin
  3573. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3574. if taicpu(hp1).oper[0]^.typ=top_reg then
  3575. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3576. end;
  3577. 2:
  3578. begin
  3579. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3580. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3581. (taicpu(hp1).opcode<>A_SHL) and
  3582. (taicpu(hp1).opcode<>A_SHR) and
  3583. (taicpu(hp1).opcode<>A_SAR) then
  3584. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3585. end;
  3586. else
  3587. internalerror(2018111801);
  3588. end;
  3589. {
  3590. ->
  3591. decw %si addw %dx,%si p
  3592. }
  3593. RemoveInstruction(hp2);
  3594. end;
  3595. end;
  3596. end;
  3597. if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  3598. GetNextInstruction(hp1, hp2) and
  3599. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  3600. MatchOperand(Taicpu(p).oper[0]^,0) and
  3601. (Taicpu(p).oper[1]^.typ = top_reg) and
  3602. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  3603. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  3604. { mov reg1,0
  3605. bts reg1,operand1 --> mov reg1,operand2
  3606. or reg1,operand2 bts reg1,operand1}
  3607. begin
  3608. Taicpu(hp2).opcode:=A_MOV;
  3609. DebugMsg(SPeepholeOptimization + 'MovBtsOr2MovBts done',hp1);
  3610. asml.remove(hp1);
  3611. insertllitem(hp2,hp2.next,hp1);
  3612. RemoveCurrentp(p, hp1);
  3613. Result:=true;
  3614. exit;
  3615. end;
  3616. {
  3617. mov ref,reg0
  3618. <op> reg0,reg1
  3619. dealloc reg0
  3620. to
  3621. <op> ref,reg1
  3622. }
  3623. if MatchOpType(taicpu(p),top_ref,top_reg) and
  3624. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  3625. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3626. MatchInstruction(hp1,[A_AND,A_OR,A_XOR,A_ADD,A_SUB,A_CMP],[Taicpu(p).opsize]) and
  3627. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) and
  3628. RegEndOfLife(taicpu(p).oper[1]^.reg,taicpu(hp1)) then
  3629. begin
  3630. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  3631. DebugMsg(SPeepholeOptimization + 'MovOp2Op done',hp1);
  3632. RemoveCurrentp(p, hp1);
  3633. Result:=true;
  3634. exit;
  3635. end;
  3636. {$ifdef x86_64}
  3637. { Convert:
  3638. movq x(ref),%reg64
  3639. shrq y,%reg64
  3640. To:
  3641. movq x+4(ref),%reg32
  3642. shrq y-32,%reg32 (Remove if y = 32)
  3643. }
  3644. if (taicpu(p).opsize = S_Q) and
  3645. (taicpu(p).oper[0]^.typ = top_ref) and { Second operand will be a register }
  3646. (taicpu(p).oper[0]^.ref^.offset <= $7FFFFFFB) and
  3647. MatchInstruction(hp1, A_SHR, [taicpu(p).opsize]) and
  3648. MatchOpType(taicpu(hp1), top_const, top_reg) and
  3649. (taicpu(hp1).oper[0]^.val >= 32) and
  3650. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3651. begin
  3652. RegName1 := debug_regname(taicpu(hp1).oper[1]^.reg);
  3653. PreMessage := 'movq ' + debug_operstr(taicpu(p).oper[0]^) + ',' + RegName1 + '; ' +
  3654. 'shrq $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + RegName1 + ' -> movl ';
  3655. { Convert to 32-bit }
  3656. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3657. taicpu(p).opsize := S_L;
  3658. Inc(taicpu(p).oper[0]^.ref^.offset, 4);
  3659. PreMessage := PreMessage + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg);
  3660. if (taicpu(hp1).oper[0]^.val = 32) then
  3661. begin
  3662. DebugMsg(SPeepholeOptimization + PreMessage + ' (MovShr2Mov)', p);
  3663. RemoveInstruction(hp1);
  3664. end
  3665. else
  3666. begin
  3667. { This will potentially open up more arithmetic operations since
  3668. the peephole optimizer now has a big hint that only the lower
  3669. 32 bits are currently in use (and opcodes are smaller in size) }
  3670. setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
  3671. taicpu(hp1).opsize := S_L;
  3672. Dec(taicpu(hp1).oper[0]^.val, 32);
  3673. DebugMsg(SPeepholeOptimization + PreMessage +
  3674. '; shrl $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (MovShr2MovShr)', p);
  3675. end;
  3676. Result := True;
  3677. Exit;
  3678. end;
  3679. {$endif x86_64}
  3680. end;
  3681. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  3682. var
  3683. hp1 : tai;
  3684. begin
  3685. Result:=false;
  3686. if taicpu(p).ops <> 2 then
  3687. exit;
  3688. if GetNextInstruction(p,hp1) and
  3689. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  3690. (taicpu(hp1).ops = 2) then
  3691. begin
  3692. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  3693. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  3694. { movXX reg1, mem1 or movXX mem1, reg1
  3695. movXX mem2, reg2 movXX reg2, mem2}
  3696. begin
  3697. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  3698. { movXX reg1, mem1 or movXX mem1, reg1
  3699. movXX mem2, reg1 movXX reg2, mem1}
  3700. begin
  3701. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  3702. begin
  3703. { Removes the second statement from
  3704. movXX reg1, mem1/reg2
  3705. movXX mem1/reg2, reg1
  3706. }
  3707. if taicpu(p).oper[0]^.typ=top_reg then
  3708. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3709. { Removes the second statement from
  3710. movXX mem1/reg1, reg2
  3711. movXX reg2, mem1/reg1
  3712. }
  3713. if (taicpu(p).oper[1]^.typ=top_reg) and
  3714. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  3715. begin
  3716. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  3717. RemoveInstruction(hp1);
  3718. RemoveCurrentp(p); { p will now be equal to the instruction that follows what was hp1 }
  3719. end
  3720. else
  3721. begin
  3722. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  3723. RemoveInstruction(hp1);
  3724. end;
  3725. Result:=true;
  3726. exit;
  3727. end
  3728. end;
  3729. end;
  3730. end;
  3731. end;
  3732. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  3733. var
  3734. hp1 : tai;
  3735. begin
  3736. result:=false;
  3737. { replace
  3738. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  3739. MovX %mreg2,%mreg1
  3740. dealloc %mreg2
  3741. by
  3742. <Op>X %mreg2,%mreg1
  3743. ?
  3744. }
  3745. if GetNextInstruction(p,hp1) and
  3746. { we mix single and double opperations here because we assume that the compiler
  3747. generates vmovapd only after double operations and vmovaps only after single operations }
  3748. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  3749. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3750. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  3751. (taicpu(p).oper[0]^.typ=top_reg) then
  3752. begin
  3753. TransferUsedRegs(TmpUsedRegs);
  3754. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3755. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3756. begin
  3757. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  3758. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3759. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  3760. RemoveInstruction(hp1);
  3761. result:=true;
  3762. end;
  3763. end;
  3764. end;
  3765. function TX86AsmOptimizer.OptPass1Test(var p: tai) : boolean;
  3766. var
  3767. hp1, p_label, p_dist, hp1_dist: tai;
  3768. JumpLabel, JumpLabel_dist: TAsmLabel;
  3769. begin
  3770. Result := False;
  3771. if GetNextInstruction(p, hp1) and
  3772. MatchInstruction(hp1,A_MOV,[]) and
  3773. (
  3774. (taicpu(p).oper[0]^.typ <> top_reg) or
  3775. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  3776. ) and
  3777. (
  3778. (taicpu(p).oper[1]^.typ <> top_reg) or
  3779. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
  3780. ) and
  3781. (
  3782. { Make sure the register written to doesn't appear in the
  3783. test instruction (in a reference, say) }
  3784. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  3785. not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
  3786. ) then
  3787. begin
  3788. { If we have something like:
  3789. test %reg1,%reg1
  3790. mov 0,%reg2
  3791. And no registers are shared (the two %reg1's can be different, as
  3792. long as neither of them are also %reg2), move the MOV command to
  3793. before the comparison as this means it can be optimised without
  3794. worrying about the FLAGS register. (This combination is generated
  3795. by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  3796. }
  3797. SwapMovCmp(p, hp1);
  3798. Result := True;
  3799. Exit;
  3800. end;
  3801. { Search for:
  3802. test %reg,%reg
  3803. j(c1) @lbl1
  3804. ...
  3805. @lbl:
  3806. test %reg,%reg (same register)
  3807. j(c2) @lbl2
  3808. If c2 is a subset of c1, change to:
  3809. test %reg,%reg
  3810. j(c1) @lbl2
  3811. (@lbl1 may become a dead label as a result)
  3812. }
  3813. if (taicpu(p).oper[1]^.typ = top_reg) and
  3814. (taicpu(p).oper[0]^.typ = top_reg) and
  3815. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  3816. MatchInstruction(hp1, A_JCC, []) and
  3817. IsJumpToLabel(taicpu(hp1)) then
  3818. begin
  3819. JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
  3820. p_label := nil;
  3821. if Assigned(JumpLabel) then
  3822. p_label := getlabelwithsym(JumpLabel);
  3823. if Assigned(p_label) and
  3824. GetNextInstruction(p_label, p_dist) and
  3825. MatchInstruction(p_dist, A_TEST, []) and
  3826. { It's fine if the second test uses smaller sub-registers }
  3827. (taicpu(p_dist).opsize <= taicpu(p).opsize) and
  3828. MatchOpType(taicpu(p_dist), top_reg, top_reg) and
  3829. SuperRegistersEqual(taicpu(p_dist).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  3830. SuperRegistersEqual(taicpu(p_dist).oper[1]^.reg, taicpu(p).oper[1]^.reg) and
  3831. GetNextInstruction(p_dist, hp1_dist) and
  3832. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  3833. begin
  3834. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  3835. if JumpLabel = JumpLabel_dist then
  3836. { This is an infinite loop }
  3837. Exit;
  3838. { Best optimisation when the first condition is a subset (or equal) of the second }
  3839. if condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
  3840. begin
  3841. { Any registers used here will already be allocated }
  3842. if Assigned(JumpLabel_dist) then
  3843. JumpLabel_dist.IncRefs;
  3844. if Assigned(JumpLabel) then
  3845. JumpLabel.DecRefs;
  3846. DebugMsg(SPeepholeOptimization + 'TEST/Jcc/@Lbl/TEST/Jcc -> TEST/Jcc, redirecting first jump', hp1);
  3847. taicpu(hp1).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  3848. Result := True;
  3849. Exit;
  3850. end;
  3851. end;
  3852. end;
  3853. end;
  3854. function TX86AsmOptimizer.OptPass1Add(var p : tai) : boolean;
  3855. var
  3856. hp1 : tai;
  3857. begin
  3858. result:=false;
  3859. { replace
  3860. addX const,%reg1
  3861. leaX (%reg1,%reg1,Y),%reg2 // Base or index might not be equal to reg1
  3862. dealloc %reg1
  3863. by
  3864. leaX const+const*Y(%reg1,%reg1,Y),%reg2
  3865. }
  3866. if MatchOpType(taicpu(p),top_const,top_reg) and
  3867. GetNextInstruction(p,hp1) and
  3868. MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  3869. ((taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base) or
  3870. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index)) then
  3871. begin
  3872. TransferUsedRegs(TmpUsedRegs);
  3873. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3874. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3875. begin
  3876. DebugMsg(SPeepholeOptimization + 'AddLea2Lea done',p);
  3877. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base then
  3878. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val);
  3879. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index then
  3880. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  3881. RemoveCurrentP(p);
  3882. result:=true;
  3883. end;
  3884. end;
  3885. end;
  3886. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  3887. var
  3888. hp1: tai;
  3889. ref: Integer;
  3890. saveref: treference;
  3891. TempReg: TRegister;
  3892. Multiple: TCGInt;
  3893. begin
  3894. Result:=false;
  3895. { removes seg register prefixes from LEA operations, as they
  3896. don't do anything}
  3897. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  3898. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  3899. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  3900. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  3901. (
  3902. { do not mess with leas accessing the stack pointer
  3903. unless it's a null operation }
  3904. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) or
  3905. (
  3906. (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
  3907. (taicpu(p).oper[0]^.ref^.offset = 0)
  3908. )
  3909. ) and
  3910. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  3911. begin
  3912. if (taicpu(p).oper[0]^.ref^.offset = 0) then
  3913. begin
  3914. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) then
  3915. begin
  3916. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  3917. taicpu(p).oper[1]^.reg);
  3918. InsertLLItem(p.previous,p.next, hp1);
  3919. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  3920. p.free;
  3921. p:=hp1;
  3922. end
  3923. else
  3924. begin
  3925. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  3926. RemoveCurrentP(p);
  3927. end;
  3928. Result:=true;
  3929. exit;
  3930. end
  3931. else if (
  3932. { continue to use lea to adjust the stack pointer,
  3933. it is the recommended way, but only if not optimizing for size }
  3934. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  3935. (cs_opt_size in current_settings.optimizerswitches)
  3936. ) and
  3937. { If the flags register is in use, don't change the instruction
  3938. to an ADD otherwise this will scramble the flags. [Kit] }
  3939. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  3940. ConvertLEA(taicpu(p)) then
  3941. begin
  3942. Result:=true;
  3943. exit;
  3944. end;
  3945. end;
  3946. if GetNextInstruction(p,hp1) and
  3947. (hp1.typ=ait_instruction) then
  3948. begin
  3949. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  3950. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3951. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  3952. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  3953. begin
  3954. TransferUsedRegs(TmpUsedRegs);
  3955. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3956. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3957. begin
  3958. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3959. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  3960. RemoveInstruction(hp1);
  3961. result:=true;
  3962. exit;
  3963. end;
  3964. end;
  3965. { changes
  3966. lea <ref1>, reg1
  3967. <op> ...,<ref. with reg1>,...
  3968. to
  3969. <op> ...,<ref1>,... }
  3970. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  3971. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  3972. not(MatchInstruction(hp1,A_LEA,[])) then
  3973. begin
  3974. { find a reference which uses reg1 }
  3975. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  3976. ref:=0
  3977. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  3978. ref:=1
  3979. else
  3980. ref:=-1;
  3981. if (ref<>-1) and
  3982. { reg1 must be either the base or the index }
  3983. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  3984. begin
  3985. { reg1 can be removed from the reference }
  3986. saveref:=taicpu(hp1).oper[ref]^.ref^;
  3987. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  3988. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  3989. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  3990. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  3991. else
  3992. Internalerror(2019111201);
  3993. { check if the can insert all data of the lea into the second instruction }
  3994. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  3995. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  3996. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  3997. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  3998. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  3999. ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  4000. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  4001. {$ifdef x86_64}
  4002. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  4003. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  4004. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  4005. )
  4006. {$endif x86_64}
  4007. then
  4008. begin
  4009. { reg1 might not used by the second instruction after it is remove from the reference }
  4010. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  4011. begin
  4012. TransferUsedRegs(TmpUsedRegs);
  4013. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4014. { reg1 is not updated so it might not be used afterwards }
  4015. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  4016. begin
  4017. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  4018. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  4019. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4020. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  4021. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  4022. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  4023. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  4024. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  4025. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  4026. if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
  4027. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  4028. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4029. RemoveCurrentP(p, hp1);
  4030. result:=true;
  4031. exit;
  4032. end
  4033. end;
  4034. end;
  4035. { recover }
  4036. taicpu(hp1).oper[ref]^.ref^:=saveref;
  4037. end;
  4038. end;
  4039. end;
  4040. { for now, we do not mess with the stack pointer, thought it might be usefull to remove
  4041. unneeded lea sequences on the stack pointer, it needs to be tested in detail }
  4042. if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  4043. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  4044. begin
  4045. { Check common LEA/LEA conditions }
  4046. if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  4047. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  4048. (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
  4049. (taicpu(p).oper[0]^.ref^.segment = NR_NO) and
  4050. (taicpu(p).oper[0]^.ref^.symbol = nil) and
  4051. (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
  4052. (taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
  4053. (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
  4054. (
  4055. (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
  4056. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
  4057. ) and (
  4058. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
  4059. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4060. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
  4061. ) then
  4062. begin
  4063. { changes
  4064. lea (regX,scale), reg1
  4065. lea offset(reg1,reg1), reg1
  4066. to
  4067. lea offset(regX,scale*2), reg1
  4068. and
  4069. lea (regX,scale1), reg1
  4070. lea offset(reg1,scale2), reg1
  4071. to
  4072. lea offset(regX,scale1*scale2), reg1
  4073. ... so long as the final scale does not exceed 8
  4074. (Similarly, allow the first instruction to be "lea (regX,regX),reg1")
  4075. }
  4076. if (taicpu(p).oper[0]^.ref^.offset = 0) and
  4077. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4078. (
  4079. (
  4080. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4081. ) or (
  4082. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4083. (
  4084. (taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
  4085. not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
  4086. )
  4087. )
  4088. ) and (
  4089. (
  4090. { lea (reg1,scale2), reg1 variant }
  4091. (taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
  4092. (
  4093. (
  4094. (taicpu(p).oper[0]^.ref^.base = NR_NO) and
  4095. (taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
  4096. ) or (
  4097. { lea (regX,regX), reg1 variant }
  4098. (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  4099. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
  4100. )
  4101. )
  4102. ) or (
  4103. { lea (reg1,reg1), reg1 variant }
  4104. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4105. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
  4106. )
  4107. ) then
  4108. begin
  4109. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
  4110. { Make everything homogeneous to make calculations easier }
  4111. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
  4112. begin
  4113. if taicpu(p).oper[0]^.ref^.index <> NR_NO then
  4114. { Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
  4115. taicpu(p).oper[0]^.ref^.scalefactor := 2
  4116. else
  4117. taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
  4118. taicpu(p).oper[0]^.ref^.base := NR_NO;
  4119. end;
  4120. if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
  4121. begin
  4122. { Just to prevent miscalculations }
  4123. if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
  4124. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
  4125. else
  4126. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
  4127. end
  4128. else
  4129. begin
  4130. taicpu(hp1).oper[0]^.ref^.base := NR_NO;
  4131. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
  4132. end;
  4133. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
  4134. RemoveCurrentP(p);
  4135. result:=true;
  4136. exit;
  4137. end
  4138. { changes
  4139. lea offset1(regX), reg1
  4140. lea offset2(reg1), reg1
  4141. to
  4142. lea offset1+offset2(regX), reg1 }
  4143. else if
  4144. (
  4145. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4146. (taicpu(p).oper[0]^.ref^.index = NR_NO)
  4147. ) or (
  4148. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4149. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  4150. (
  4151. (
  4152. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4153. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4154. ) or (
  4155. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4156. (
  4157. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4158. (
  4159. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4160. (
  4161. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
  4162. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  4163. )
  4164. )
  4165. )
  4166. )
  4167. )
  4168. ) then
  4169. begin
  4170. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
  4171. if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
  4172. begin
  4173. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
  4174. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  4175. { if the register is used as index and base, we have to increase for base as well
  4176. and adapt base }
  4177. if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
  4178. begin
  4179. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4180. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4181. end;
  4182. end
  4183. else
  4184. begin
  4185. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4186. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4187. end;
  4188. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  4189. begin
  4190. taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
  4191. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  4192. taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  4193. end;
  4194. RemoveCurrentP(p);
  4195. result:=true;
  4196. exit;
  4197. end;
  4198. end;
  4199. { Change:
  4200. leal/q $x(%reg1),%reg2
  4201. ...
  4202. shll/q $y,%reg2
  4203. To:
  4204. leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
  4205. }
  4206. if MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
  4207. MatchOpType(taicpu(hp1), top_const, top_reg) and
  4208. (taicpu(hp1).oper[0]^.val <= 3) then
  4209. begin
  4210. Multiple := 1 shl taicpu(hp1).oper[0]^.val;
  4211. TransferUsedRegs(TmpUsedRegs);
  4212. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  4213. TempReg := taicpu(hp1).oper[1]^.reg; { Store locally to reduce the number of dereferences }
  4214. if
  4215. { This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
  4216. (this works even if scalefactor is zero) }
  4217. ((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
  4218. { Ensure offset doesn't go out of bounds }
  4219. (abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
  4220. not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
  4221. MatchOperand(taicpu(p).oper[1]^, TempReg) and
  4222. (
  4223. (
  4224. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, TempReg) and
  4225. (
  4226. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4227. (taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
  4228. (
  4229. { Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
  4230. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4231. (taicpu(p).oper[0]^.ref^.scalefactor <= 1)
  4232. )
  4233. )
  4234. ) or (
  4235. (
  4236. (taicpu(p).oper[0]^.ref^.base = NR_NO) or
  4237. (taicpu(p).oper[0]^.ref^.base = NR_INVALID)
  4238. ) and
  4239. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, TempReg)
  4240. )
  4241. ) then
  4242. begin
  4243. repeat
  4244. with taicpu(p).oper[0]^.ref^ do
  4245. begin
  4246. { Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
  4247. if index = base then
  4248. begin
  4249. if Multiple > 4 then
  4250. { Optimisation will no longer work because resultant
  4251. scale factor will exceed 8 }
  4252. Break;
  4253. base := NR_NO;
  4254. scalefactor := 2;
  4255. DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
  4256. end
  4257. else if (base <> NR_NO) and (base <> NR_INVALID) then
  4258. begin
  4259. { Scale factor only works on the index register }
  4260. index := base;
  4261. base := NR_NO;
  4262. end;
  4263. { For safety }
  4264. if scalefactor <= 1 then
  4265. begin
  4266. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
  4267. scalefactor := Multiple;
  4268. end
  4269. else
  4270. begin
  4271. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
  4272. scalefactor := scalefactor * Multiple;
  4273. end;
  4274. offset := offset * Multiple;
  4275. end;
  4276. RemoveInstruction(hp1);
  4277. Result := True;
  4278. Exit;
  4279. { This repeat..until loop exists for the benefit of Break }
  4280. until True;
  4281. end;
  4282. end;
  4283. end;
  4284. end;
  4285. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  4286. var
  4287. hp1 : tai;
  4288. begin
  4289. DoSubAddOpt := False;
  4290. if GetLastInstruction(p, hp1) and
  4291. (hp1.typ = ait_instruction) and
  4292. (taicpu(hp1).opsize = taicpu(p).opsize) then
  4293. case taicpu(hp1).opcode Of
  4294. A_DEC:
  4295. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  4296. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  4297. begin
  4298. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  4299. RemoveInstruction(hp1);
  4300. end;
  4301. A_SUB:
  4302. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4303. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4304. begin
  4305. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  4306. RemoveInstruction(hp1);
  4307. end;
  4308. A_ADD:
  4309. begin
  4310. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4311. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4312. begin
  4313. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  4314. RemoveInstruction(hp1);
  4315. if (taicpu(p).oper[0]^.val = 0) then
  4316. begin
  4317. hp1 := tai(p.next);
  4318. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  4319. if not GetLastInstruction(hp1, p) then
  4320. p := hp1;
  4321. DoSubAddOpt := True;
  4322. end
  4323. end;
  4324. end;
  4325. else
  4326. ;
  4327. end;
  4328. end;
  4329. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  4330. {$ifdef i386}
  4331. var
  4332. hp1 : tai;
  4333. {$endif i386}
  4334. begin
  4335. Result:=false;
  4336. { * change "subl $2, %esp; pushw x" to "pushl x"}
  4337. { * change "sub/add const1, reg" or "dec reg" followed by
  4338. "sub const2, reg" to one "sub ..., reg" }
  4339. if MatchOpType(taicpu(p),top_const,top_reg) then
  4340. begin
  4341. {$ifdef i386}
  4342. if (taicpu(p).oper[0]^.val = 2) and
  4343. (taicpu(p).oper[1]^.reg = NR_ESP) and
  4344. { Don't do the sub/push optimization if the sub }
  4345. { comes from setting up the stack frame (JM) }
  4346. (not(GetLastInstruction(p,hp1)) or
  4347. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  4348. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  4349. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  4350. begin
  4351. hp1 := tai(p.next);
  4352. while Assigned(hp1) and
  4353. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  4354. not RegReadByInstruction(NR_ESP,hp1) and
  4355. not RegModifiedByInstruction(NR_ESP,hp1) do
  4356. hp1 := tai(hp1.next);
  4357. if Assigned(hp1) and
  4358. MatchInstruction(hp1,A_PUSH,[S_W]) then
  4359. begin
  4360. taicpu(hp1).changeopsize(S_L);
  4361. if taicpu(hp1).oper[0]^.typ=top_reg then
  4362. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  4363. hp1 := tai(p.next);
  4364. RemoveCurrentp(p, hp1);
  4365. Result:=true;
  4366. exit;
  4367. end;
  4368. end;
  4369. {$endif i386}
  4370. if DoSubAddOpt(p) then
  4371. Result:=true;
  4372. end;
  4373. end;
  4374. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  4375. var
  4376. TmpBool1,TmpBool2 : Boolean;
  4377. tmpref : treference;
  4378. hp1,hp2: tai;
  4379. mask: tcgint;
  4380. begin
  4381. Result:=false;
  4382. { All these optimisations work on "shl/sal const,%reg" }
  4383. if not MatchOpType(taicpu(p),top_const,top_reg) then
  4384. Exit;
  4385. if (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  4386. (taicpu(p).oper[0]^.val <= 3) then
  4387. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  4388. begin
  4389. { should we check the next instruction? }
  4390. TmpBool1 := True;
  4391. { have we found an add/sub which could be
  4392. integrated in the lea? }
  4393. TmpBool2 := False;
  4394. reference_reset(tmpref,2,[]);
  4395. TmpRef.index := taicpu(p).oper[1]^.reg;
  4396. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4397. while TmpBool1 and
  4398. GetNextInstruction(p, hp1) and
  4399. (tai(hp1).typ = ait_instruction) and
  4400. ((((taicpu(hp1).opcode = A_ADD) or
  4401. (taicpu(hp1).opcode = A_SUB)) and
  4402. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  4403. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  4404. (((taicpu(hp1).opcode = A_INC) or
  4405. (taicpu(hp1).opcode = A_DEC)) and
  4406. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4407. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  4408. ((taicpu(hp1).opcode = A_LEA) and
  4409. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4410. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  4411. (not GetNextInstruction(hp1,hp2) or
  4412. not instrReadsFlags(hp2)) Do
  4413. begin
  4414. TmpBool1 := False;
  4415. if taicpu(hp1).opcode=A_LEA then
  4416. begin
  4417. if (TmpRef.base = NR_NO) and
  4418. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  4419. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  4420. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  4421. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  4422. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  4423. begin
  4424. TmpBool1 := True;
  4425. TmpBool2 := True;
  4426. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  4427. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  4428. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  4429. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  4430. RemoveInstruction(hp1);
  4431. end
  4432. end
  4433. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  4434. begin
  4435. TmpBool1 := True;
  4436. TmpBool2 := True;
  4437. case taicpu(hp1).opcode of
  4438. A_ADD:
  4439. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4440. A_SUB:
  4441. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4442. else
  4443. internalerror(2019050536);
  4444. end;
  4445. RemoveInstruction(hp1);
  4446. end
  4447. else
  4448. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4449. (((taicpu(hp1).opcode = A_ADD) and
  4450. (TmpRef.base = NR_NO)) or
  4451. (taicpu(hp1).opcode = A_INC) or
  4452. (taicpu(hp1).opcode = A_DEC)) then
  4453. begin
  4454. TmpBool1 := True;
  4455. TmpBool2 := True;
  4456. case taicpu(hp1).opcode of
  4457. A_ADD:
  4458. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  4459. A_INC:
  4460. inc(TmpRef.offset);
  4461. A_DEC:
  4462. dec(TmpRef.offset);
  4463. else
  4464. internalerror(2019050535);
  4465. end;
  4466. RemoveInstruction(hp1);
  4467. end;
  4468. end;
  4469. if TmpBool2
  4470. {$ifndef x86_64}
  4471. or
  4472. ((current_settings.optimizecputype < cpu_Pentium2) and
  4473. (taicpu(p).oper[0]^.val <= 3) and
  4474. not(cs_opt_size in current_settings.optimizerswitches))
  4475. {$endif x86_64}
  4476. then
  4477. begin
  4478. if not(TmpBool2) and
  4479. (taicpu(p).oper[0]^.val=1) then
  4480. begin
  4481. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4482. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  4483. end
  4484. else
  4485. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  4486. taicpu(p).oper[1]^.reg);
  4487. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  4488. InsertLLItem(p.previous, p.next, hp1);
  4489. p.free;
  4490. p := hp1;
  4491. end;
  4492. end
  4493. {$ifndef x86_64}
  4494. else if (current_settings.optimizecputype < cpu_Pentium2) then
  4495. begin
  4496. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  4497. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  4498. (unlike shl, which is only Tairable in the U pipe) }
  4499. if taicpu(p).oper[0]^.val=1 then
  4500. begin
  4501. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4502. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  4503. InsertLLItem(p.previous, p.next, hp1);
  4504. p.free;
  4505. p := hp1;
  4506. end
  4507. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  4508. "shl $3, %reg" to "lea (,%reg,8), %reg }
  4509. else if (taicpu(p).opsize = S_L) and
  4510. (taicpu(p).oper[0]^.val<= 3) then
  4511. begin
  4512. reference_reset(tmpref,2,[]);
  4513. TmpRef.index := taicpu(p).oper[1]^.reg;
  4514. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4515. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  4516. InsertLLItem(p.previous, p.next, hp1);
  4517. p.free;
  4518. p := hp1;
  4519. end;
  4520. end
  4521. {$endif x86_64}
  4522. else if
  4523. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  4524. (
  4525. (
  4526. MatchInstruction(hp1, A_AND, [taicpu(p).opsize]) and
  4527. SetAndTest(hp1, hp2)
  4528. {$ifdef x86_64}
  4529. ) or
  4530. (
  4531. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  4532. GetNextInstruction(hp1, hp2) and
  4533. MatchInstruction(hp2, A_AND, [taicpu(p).opsize]) and
  4534. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  4535. (taicpu(hp1).oper[1]^.reg = taicpu(hp2).oper[0]^.reg)
  4536. {$endif x86_64}
  4537. )
  4538. ) and
  4539. (taicpu(p).oper[1]^.reg = taicpu(hp2).oper[1]^.reg) then
  4540. begin
  4541. { Change:
  4542. shl x, %reg1
  4543. mov -(1<<x), %reg2
  4544. and %reg2, %reg1
  4545. Or:
  4546. shl x, %reg1
  4547. and -(1<<x), %reg1
  4548. To just:
  4549. shl x, %reg1
  4550. Since the and operation only zeroes bits that are already zero from the shl operation
  4551. }
  4552. case taicpu(p).oper[0]^.val of
  4553. 8:
  4554. mask:=$FFFFFFFFFFFFFF00;
  4555. 16:
  4556. mask:=$FFFFFFFFFFFF0000;
  4557. 32:
  4558. mask:=$FFFFFFFF00000000;
  4559. 63:
  4560. { Constant pre-calculated to prevent overflow errors with Int64 }
  4561. mask:=$8000000000000000;
  4562. else
  4563. begin
  4564. if taicpu(p).oper[0]^.val >= 64 then
  4565. { Shouldn't happen realistically, since the register
  4566. is guaranteed to be set to zero at this point }
  4567. mask := 0
  4568. else
  4569. mask := -(Int64(1 shl taicpu(p).oper[0]^.val));
  4570. end;
  4571. end;
  4572. if taicpu(hp1).oper[0]^.val = mask then
  4573. begin
  4574. { Everything checks out, perform the optimisation, as long as
  4575. the FLAGS register isn't being used}
  4576. TransferUsedRegs(TmpUsedRegs);
  4577. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4578. {$ifdef x86_64}
  4579. if (hp1 <> hp2) then
  4580. begin
  4581. { "shl/mov/and" version }
  4582. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  4583. { Don't do the optimisation if the FLAGS register is in use }
  4584. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)) then
  4585. begin
  4586. DebugMsg(SPeepholeOptimization + 'ShlMovAnd2Shl', p);
  4587. { Don't remove the 'mov' instruction if its register is used elsewhere }
  4588. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp2, TmpUsedRegs)) then
  4589. begin
  4590. RemoveInstruction(hp1);
  4591. Result := True;
  4592. end;
  4593. { Only set Result to True if the 'mov' instruction was removed }
  4594. RemoveInstruction(hp2);
  4595. end;
  4596. end
  4597. else
  4598. {$endif x86_64}
  4599. begin
  4600. { "shl/and" version }
  4601. { Don't do the optimisation if the FLAGS register is in use }
  4602. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  4603. begin
  4604. DebugMsg(SPeepholeOptimization + 'ShlAnd2Shl', p);
  4605. RemoveInstruction(hp1);
  4606. Result := True;
  4607. end;
  4608. end;
  4609. Exit;
  4610. end
  4611. else {$ifdef x86_64}if (hp1 = hp2) then{$endif x86_64}
  4612. begin
  4613. { Even if the mask doesn't allow for its removal, we might be
  4614. able to optimise the mask for the "shl/and" version, which
  4615. may permit other peephole optimisations }
  4616. {$ifdef DEBUG_AOPTCPU}
  4617. mask := taicpu(hp1).oper[0]^.val and mask;
  4618. if taicpu(hp1).oper[0]^.val <> mask then
  4619. begin
  4620. DebugMsg(
  4621. SPeepholeOptimization +
  4622. 'Changed mask from $' + debug_tostr(taicpu(hp1).oper[0]^.val) +
  4623. ' to $' + debug_tostr(mask) +
  4624. 'based on previous instruction (ShlAnd2ShlAnd)', hp1);
  4625. taicpu(hp1).oper[0]^.val := mask;
  4626. end;
  4627. {$else DEBUG_AOPTCPU}
  4628. { If debugging is off, just set the operand even if it's the same }
  4629. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val and mask;
  4630. {$endif DEBUG_AOPTCPU}
  4631. end;
  4632. end;
  4633. {
  4634. change
  4635. shl/sal const,reg
  4636. <op> ...(...,reg,1),...
  4637. into
  4638. <op> ...(...,reg,1 shl const),...
  4639. if const in 1..3
  4640. }
  4641. if MatchOpType(taicpu(p), top_const, top_reg) and
  4642. (taicpu(p).oper[0]^.val in [1..3]) and
  4643. GetNextInstruction(p, hp1) and
  4644. MatchInstruction(hp1,A_MOV,A_LEA,[]) and
  4645. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  4646. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index) and
  4647. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^.ref^.base) and
  4648. (taicpu(hp1).oper[0]^.ref^.scalefactor in [0,1]) then
  4649. begin
  4650. TransferUsedRegs(TmpUsedRegs);
  4651. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4652. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  4653. begin
  4654. taicpu(hp1).oper[0]^.ref^.scalefactor:=1 shl taicpu(p).oper[0]^.val;
  4655. DebugMsg(SPeepholeOptimization + 'ShlOp2Op', p);
  4656. RemoveCurrentP(p);
  4657. Result:=true;
  4658. end;
  4659. end;
  4660. end;
  4661. function TX86AsmOptimizer.CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  4662. var
  4663. CurrentRef: TReference;
  4664. FullReg: TRegister;
  4665. hp1, hp2: tai;
  4666. begin
  4667. Result := False;
  4668. if (first_mov.opsize <> S_B) or (second_mov.opsize <> S_B) then
  4669. Exit;
  4670. { We assume you've checked if the operand is actually a reference by
  4671. this point. If it isn't, you'll most likely get an access violation }
  4672. CurrentRef := first_mov.oper[1]^.ref^;
  4673. { Memory must be aligned }
  4674. if (CurrentRef.offset mod 4) <> 0 then
  4675. Exit;
  4676. Inc(CurrentRef.offset);
  4677. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4678. if MatchOperand(second_mov.oper[0]^, 0) and
  4679. references_equal(second_mov.oper[1]^.ref^, CurrentRef) and
  4680. GetNextInstruction(second_mov, hp1) and
  4681. (hp1.typ = ait_instruction) and
  4682. (taicpu(hp1).opcode = A_MOV) and
  4683. MatchOpType(taicpu(hp1), top_const, top_ref) and
  4684. (taicpu(hp1).oper[0]^.val = 0) then
  4685. begin
  4686. Inc(CurrentRef.offset);
  4687. CurrentRef.alignment := taicpu(hp1).oper[1]^.ref^.alignment; { Otherwise references_equal might return False }
  4688. FullReg := newreg(R_INTREGISTER,getsupreg(first_mov.oper[0]^.reg), R_SUBD);
  4689. if references_equal(taicpu(hp1).oper[1]^.ref^, CurrentRef) then
  4690. begin
  4691. case taicpu(hp1).opsize of
  4692. S_B:
  4693. if GetNextInstruction(hp1, hp2) and
  4694. MatchInstruction(taicpu(hp2), A_MOV, [S_B]) and
  4695. MatchOpType(taicpu(hp2), top_const, top_ref) and
  4696. (taicpu(hp2).oper[0]^.val = 0) then
  4697. begin
  4698. Inc(CurrentRef.offset);
  4699. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4700. if references_equal(taicpu(hp2).oper[1]^.ref^, CurrentRef) and
  4701. (taicpu(hp2).opsize = S_B) then
  4702. begin
  4703. RemoveInstruction(hp1);
  4704. RemoveInstruction(hp2);
  4705. first_mov.opsize := S_L;
  4706. if first_mov.oper[0]^.typ = top_reg then
  4707. begin
  4708. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVZX/MOVl', first_mov);
  4709. { Reuse second_mov as a MOVZX instruction }
  4710. second_mov.opcode := A_MOVZX;
  4711. second_mov.opsize := S_BL;
  4712. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4713. second_mov.loadreg(1, FullReg);
  4714. first_mov.oper[0]^.reg := FullReg;
  4715. asml.Remove(second_mov);
  4716. asml.InsertBefore(second_mov, first_mov);
  4717. end
  4718. else
  4719. { It's a value }
  4720. begin
  4721. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVl', first_mov);
  4722. RemoveInstruction(second_mov);
  4723. end;
  4724. Result := True;
  4725. Exit;
  4726. end;
  4727. end;
  4728. S_W:
  4729. begin
  4730. RemoveInstruction(hp1);
  4731. first_mov.opsize := S_L;
  4732. if first_mov.oper[0]^.typ = top_reg then
  4733. begin
  4734. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVZX/MOVl', first_mov);
  4735. { Reuse second_mov as a MOVZX instruction }
  4736. second_mov.opcode := A_MOVZX;
  4737. second_mov.opsize := S_BL;
  4738. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4739. second_mov.loadreg(1, FullReg);
  4740. first_mov.oper[0]^.reg := FullReg;
  4741. asml.Remove(second_mov);
  4742. asml.InsertBefore(second_mov, first_mov);
  4743. end
  4744. else
  4745. { It's a value }
  4746. begin
  4747. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVl', first_mov);
  4748. RemoveInstruction(second_mov);
  4749. end;
  4750. Result := True;
  4751. Exit;
  4752. end;
  4753. else
  4754. ;
  4755. end;
  4756. end;
  4757. end;
  4758. end;
  4759. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  4760. { returns true if a "continue" should be done after this optimization }
  4761. var
  4762. hp1, hp2: tai;
  4763. begin
  4764. Result := false;
  4765. if MatchOpType(taicpu(p),top_ref) and
  4766. GetNextInstruction(p, hp1) and
  4767. (hp1.typ = ait_instruction) and
  4768. (((taicpu(hp1).opcode = A_FLD) and
  4769. (taicpu(p).opcode = A_FSTP)) or
  4770. ((taicpu(p).opcode = A_FISTP) and
  4771. (taicpu(hp1).opcode = A_FILD))) and
  4772. MatchOpType(taicpu(hp1),top_ref) and
  4773. (taicpu(hp1).opsize = taicpu(p).opsize) and
  4774. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4775. begin
  4776. { replacing fstp f;fld f by fst f is only valid for extended because of rounding or if fastmath is on }
  4777. if ((taicpu(p).opsize=S_FX) or (cs_opt_fastmath in current_settings.optimizerswitches)) and
  4778. GetNextInstruction(hp1, hp2) and
  4779. (hp2.typ = ait_instruction) and
  4780. IsExitCode(hp2) and
  4781. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  4782. not(assigned(current_procinfo.procdef.funcretsym) and
  4783. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  4784. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  4785. begin
  4786. RemoveInstruction(hp1);
  4787. RemoveCurrentP(p, hp2);
  4788. RemoveLastDeallocForFuncRes(p);
  4789. Result := true;
  4790. end
  4791. else
  4792. { we can do this only in fast math mode as fstp is rounding ...
  4793. ... still disabled as it breaks the compiler and/or rtl }
  4794. if ({ (cs_opt_fastmath in current_settings.optimizerswitches) or }
  4795. { ... or if another fstp equal to the first one follows }
  4796. (GetNextInstruction(hp1,hp2) and
  4797. (hp2.typ = ait_instruction) and
  4798. (taicpu(p).opcode=taicpu(hp2).opcode) and
  4799. (taicpu(p).opsize=taicpu(hp2).opsize))
  4800. ) and
  4801. { fst can't store an extended/comp value }
  4802. (taicpu(p).opsize <> S_FX) and
  4803. (taicpu(p).opsize <> S_IQ) then
  4804. begin
  4805. if (taicpu(p).opcode = A_FSTP) then
  4806. taicpu(p).opcode := A_FST
  4807. else
  4808. taicpu(p).opcode := A_FIST;
  4809. DebugMsg(SPeepholeOptimization + 'FstpFld2Fst',p);
  4810. RemoveInstruction(hp1);
  4811. end;
  4812. end;
  4813. end;
  4814. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  4815. var
  4816. hp1, hp2: tai;
  4817. begin
  4818. result:=false;
  4819. if MatchOpType(taicpu(p),top_reg) and
  4820. GetNextInstruction(p, hp1) and
  4821. (hp1.typ = Ait_Instruction) and
  4822. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  4823. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  4824. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  4825. { change to
  4826. fld reg fxxx reg,st
  4827. fxxxp st, st1 (hp1)
  4828. Remark: non commutative operations must be reversed!
  4829. }
  4830. begin
  4831. case taicpu(hp1).opcode Of
  4832. A_FMULP,A_FADDP,
  4833. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4834. begin
  4835. case taicpu(hp1).opcode Of
  4836. A_FADDP: taicpu(hp1).opcode := A_FADD;
  4837. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  4838. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  4839. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  4840. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  4841. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  4842. else
  4843. internalerror(2019050534);
  4844. end;
  4845. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  4846. taicpu(hp1).oper[1]^.reg := NR_ST;
  4847. RemoveCurrentP(p, hp1);
  4848. Result:=true;
  4849. exit;
  4850. end;
  4851. else
  4852. ;
  4853. end;
  4854. end
  4855. else
  4856. if MatchOpType(taicpu(p),top_ref) and
  4857. GetNextInstruction(p, hp2) and
  4858. (hp2.typ = Ait_Instruction) and
  4859. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  4860. (taicpu(p).opsize in [S_FS, S_FL]) and
  4861. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  4862. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  4863. if GetLastInstruction(p, hp1) and
  4864. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  4865. MatchOpType(taicpu(hp1),top_ref) and
  4866. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4867. if ((taicpu(hp2).opcode = A_FMULP) or
  4868. (taicpu(hp2).opcode = A_FADDP)) then
  4869. { change to
  4870. fld/fst mem1 (hp1) fld/fst mem1
  4871. fld mem1 (p) fadd/
  4872. faddp/ fmul st, st
  4873. fmulp st, st1 (hp2) }
  4874. begin
  4875. RemoveCurrentP(p, hp1);
  4876. if (taicpu(hp2).opcode = A_FADDP) then
  4877. taicpu(hp2).opcode := A_FADD
  4878. else
  4879. taicpu(hp2).opcode := A_FMUL;
  4880. taicpu(hp2).oper[1]^.reg := NR_ST;
  4881. end
  4882. else
  4883. { change to
  4884. fld/fst mem1 (hp1) fld/fst mem1
  4885. fld mem1 (p) fld st}
  4886. begin
  4887. taicpu(p).changeopsize(S_FL);
  4888. taicpu(p).loadreg(0,NR_ST);
  4889. end
  4890. else
  4891. begin
  4892. case taicpu(hp2).opcode Of
  4893. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4894. { change to
  4895. fld/fst mem1 (hp1) fld/fst mem1
  4896. fld mem2 (p) fxxx mem2
  4897. fxxxp st, st1 (hp2) }
  4898. begin
  4899. case taicpu(hp2).opcode Of
  4900. A_FADDP: taicpu(p).opcode := A_FADD;
  4901. A_FMULP: taicpu(p).opcode := A_FMUL;
  4902. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  4903. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  4904. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  4905. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  4906. else
  4907. internalerror(2019050533);
  4908. end;
  4909. RemoveInstruction(hp2);
  4910. end
  4911. else
  4912. ;
  4913. end
  4914. end
  4915. end;
  4916. function IsCmpSubset(cond1, cond2: TAsmCond): Boolean; inline;
  4917. begin
  4918. Result := condition_in(cond1, cond2) or
  4919. { Not strictly subsets due to the actual flags checked, but because we're
  4920. comparing integers, E is a subset of AE and GE and their aliases }
  4921. ((cond1 in [C_E, C_Z]) and (cond2 in [C_AE, C_NB, C_NC, C_GE, C_NL]));
  4922. end;
  4923. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  4924. var
  4925. v: TCGInt;
  4926. hp1, hp2, p_dist, p_jump, hp1_dist, p_label, hp1_label: tai;
  4927. FirstMatch: Boolean;
  4928. JumpLabel, JumpLabel_dist, JumpLabel_far: TAsmLabel;
  4929. begin
  4930. Result:=false;
  4931. { All these optimisations need a next instruction }
  4932. if not GetNextInstruction(p, hp1) then
  4933. Exit;
  4934. { Search for:
  4935. cmp ###,###
  4936. j(c1) @lbl1
  4937. ...
  4938. @lbl:
  4939. cmp ###.### (same comparison as above)
  4940. j(c2) @lbl2
  4941. If c1 is a subset of c2, change to:
  4942. cmp ###,###
  4943. j(c2) @lbl2
  4944. (@lbl1 may become a dead label as a result)
  4945. }
  4946. { Also handle cases where there are multiple jumps in a row }
  4947. p_jump := hp1;
  4948. while Assigned(p_jump) and MatchInstruction(p_jump, A_JCC, []) do
  4949. begin
  4950. if IsJumpToLabel(taicpu(p_jump)) then
  4951. begin
  4952. JumpLabel := TAsmLabel(taicpu(p_jump).oper[0]^.ref^.symbol);
  4953. p_label := nil;
  4954. if Assigned(JumpLabel) then
  4955. p_label := getlabelwithsym(JumpLabel);
  4956. if Assigned(p_label) and
  4957. GetNextInstruction(p_label, p_dist) and
  4958. MatchInstruction(p_dist, A_CMP, []) and
  4959. MatchOperand(taicpu(p_dist).oper[0]^, taicpu(p).oper[0]^) and
  4960. MatchOperand(taicpu(p_dist).oper[1]^, taicpu(p).oper[1]^) and
  4961. GetNextInstruction(p_dist, hp1_dist) and
  4962. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  4963. begin
  4964. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  4965. if JumpLabel = JumpLabel_dist then
  4966. { This is an infinite loop }
  4967. Exit;
  4968. { Best optimisation when the first condition is a subset (or equal) of the second }
  4969. if IsCmpSubset(taicpu(p_jump).condition, taicpu(hp1_dist).condition) then
  4970. begin
  4971. { Any registers used here will already be allocated }
  4972. if Assigned(JumpLabel_dist) then
  4973. JumpLabel_dist.IncRefs;
  4974. if Assigned(JumpLabel) then
  4975. JumpLabel.DecRefs;
  4976. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc -> CMP/Jcc, redirecting first jump', p_jump);
  4977. taicpu(p_jump).condition := taicpu(hp1_dist).condition;
  4978. taicpu(p_jump).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  4979. Result := True;
  4980. { Don't exit yet. Since p and p_jump haven't actually been
  4981. removed, we can check for more on this iteration }
  4982. end
  4983. else if IsCmpSubset(taicpu(hp1_dist).condition, inverse_cond(taicpu(p_jump).condition)) and
  4984. GetNextInstruction(hp1_dist, hp1_label) and
  4985. SkipAligns(hp1_label, hp1_label) and
  4986. (hp1_label.typ = ait_label) then
  4987. begin
  4988. JumpLabel_far := tai_label(hp1_label).labsym;
  4989. if (JumpLabel_far = JumpLabel_dist) or (JumpLabel_far = JumpLabel) then
  4990. { This is an infinite loop }
  4991. Exit;
  4992. if Assigned(JumpLabel_far) then
  4993. begin
  4994. { In this situation, if the first jump branches, the second one will never,
  4995. branch so change the destination label to after the second jump }
  4996. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc/@Lbl -> CMP/Jcc, redirecting first jump to 2nd label', p_jump);
  4997. if Assigned(JumpLabel) then
  4998. JumpLabel.DecRefs;
  4999. JumpLabel_far.IncRefs;
  5000. taicpu(p_jump).oper[0]^.ref^.symbol := JumpLabel_far;
  5001. Result := True;
  5002. { Don't exit yet. Since p and p_jump haven't actually been
  5003. removed, we can check for more on this iteration }
  5004. Continue;
  5005. end;
  5006. end;
  5007. end;
  5008. end;
  5009. { Search for:
  5010. cmp ###,###
  5011. j(c1) @lbl1
  5012. cmp ###,### (same as first)
  5013. Remove second cmp
  5014. }
  5015. if GetNextInstruction(p_jump, hp2) and
  5016. (
  5017. (
  5018. MatchInstruction(hp2, A_CMP, []) and
  5019. (
  5020. (
  5021. MatchOpType(taicpu(p), top_const, top_reg) and
  5022. (taicpu(hp2).oper[0]^.val = taicpu(p).oper[0]^.val) and
  5023. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  5024. ) or (
  5025. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  5026. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^)
  5027. )
  5028. )
  5029. ) or (
  5030. { Also match cmp $0,%reg; jcc @lbl; test %reg,%reg }
  5031. MatchOperand(taicpu(p).oper[0]^, 0) and
  5032. (taicpu(p).oper[1]^.typ = top_reg) and
  5033. MatchInstruction(hp2, A_TEST, []) and
  5034. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  5035. (taicpu(hp2).oper[0]^.reg = taicpu(hp2).oper[1]^.reg) and
  5036. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  5037. )
  5038. ) then
  5039. begin
  5040. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/CMP; removed superfluous CMP', hp2);
  5041. RemoveInstruction(hp2);
  5042. Result := True;
  5043. { Continue the while loop in case "Jcc/CMP" follows the second CMP that was just removed }
  5044. end;
  5045. GetNextInstruction(p_jump, p_jump);
  5046. end;
  5047. if taicpu(p).oper[0]^.typ = top_const then
  5048. begin
  5049. if (taicpu(p).oper[0]^.val = 0) and
  5050. (taicpu(p).oper[1]^.typ = top_reg) and
  5051. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
  5052. begin
  5053. hp2 := p;
  5054. FirstMatch := True;
  5055. { When dealing with "cmp $0,%reg", only ZF and SF contain
  5056. anything meaningful once it's converted to "test %reg,%reg";
  5057. additionally, some jumps will always (or never) branch, so
  5058. evaluate every jump immediately following the
  5059. comparison, optimising the conditions if possible.
  5060. Similarly with SETcc... those that are always set to 0 or 1
  5061. are changed to MOV instructions }
  5062. while FirstMatch or { Saves calling GetNextInstruction unnecessarily }
  5063. (
  5064. GetNextInstruction(hp2, hp1) and
  5065. MatchInstruction(hp1,A_Jcc,A_SETcc,[])
  5066. ) do
  5067. begin
  5068. FirstMatch := False;
  5069. case taicpu(hp1).condition of
  5070. C_B, C_C, C_NAE, C_O:
  5071. { For B/NAE:
  5072. Will never branch since an unsigned integer can never be below zero
  5073. For C/O:
  5074. Result cannot overflow because 0 is being subtracted
  5075. }
  5076. begin
  5077. if taicpu(hp1).opcode = A_Jcc then
  5078. begin
  5079. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
  5080. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  5081. RemoveInstruction(hp1);
  5082. { Since hp1 was deleted, hp2 must not be updated }
  5083. Continue;
  5084. end
  5085. else
  5086. begin
  5087. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
  5088. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  5089. taicpu(hp1).opcode := A_MOV;
  5090. taicpu(hp1).ops := 2;
  5091. taicpu(hp1).condition := C_None;
  5092. taicpu(hp1).opsize := S_B;
  5093. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5094. taicpu(hp1).loadconst(0, 0);
  5095. end;
  5096. end;
  5097. C_BE, C_NA:
  5098. begin
  5099. { Will only branch if equal to zero }
  5100. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
  5101. taicpu(hp1).condition := C_E;
  5102. end;
  5103. C_A, C_NBE:
  5104. begin
  5105. { Will only branch if not equal to zero }
  5106. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
  5107. taicpu(hp1).condition := C_NE;
  5108. end;
  5109. C_AE, C_NB, C_NC, C_NO:
  5110. begin
  5111. { Will always branch }
  5112. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
  5113. if taicpu(hp1).opcode = A_Jcc then
  5114. begin
  5115. MakeUnconditional(taicpu(hp1));
  5116. { Any jumps/set that follow will now be dead code }
  5117. RemoveDeadCodeAfterJump(taicpu(hp1));
  5118. Break;
  5119. end
  5120. else
  5121. begin
  5122. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  5123. taicpu(hp1).opcode := A_MOV;
  5124. taicpu(hp1).ops := 2;
  5125. taicpu(hp1).condition := C_None;
  5126. taicpu(hp1).opsize := S_B;
  5127. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5128. taicpu(hp1).loadconst(0, 1);
  5129. end;
  5130. end;
  5131. C_None:
  5132. InternalError(2020012201);
  5133. C_P, C_PE, C_NP, C_PO:
  5134. { We can't handle parity checks and they should never be generated
  5135. after a general-purpose CMP (it's used in some floating-point
  5136. comparisons that don't use CMP) }
  5137. InternalError(2020012202);
  5138. else
  5139. { Zero/Equality, Sign, their complements and all of the
  5140. signed comparisons do not need to be converted };
  5141. end;
  5142. hp2 := hp1;
  5143. end;
  5144. { Convert the instruction to a TEST }
  5145. taicpu(p).opcode := A_TEST;
  5146. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5147. Result := True;
  5148. Exit;
  5149. end
  5150. else if (taicpu(p).oper[0]^.val = 1) and
  5151. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5152. (taicpu(hp1).condition in [C_L, C_NGE]) then
  5153. begin
  5154. { Convert; To:
  5155. cmp $1,r/m cmp $0,r/m
  5156. jl @lbl jle @lbl
  5157. }
  5158. DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
  5159. taicpu(p).oper[0]^.val := 0;
  5160. taicpu(hp1).condition := C_LE;
  5161. { If the instruction is now "cmp $0,%reg", convert it to a
  5162. TEST (and effectively do the work of the "cmp $0,%reg" in
  5163. the block above)
  5164. If it's a reference, we can get away with not setting
  5165. Result to True because he haven't evaluated the jump
  5166. in this pass yet.
  5167. }
  5168. if (taicpu(p).oper[1]^.typ = top_reg) then
  5169. begin
  5170. taicpu(p).opcode := A_TEST;
  5171. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5172. Result := True;
  5173. end;
  5174. Exit;
  5175. end
  5176. else if (taicpu(p).oper[1]^.typ = top_reg)
  5177. {$ifdef x86_64}
  5178. and (taicpu(p).opsize <> S_Q) { S_Q will never happen: cmp with 64 bit constants is not possible }
  5179. {$endif x86_64}
  5180. then
  5181. begin
  5182. { cmp register,$8000 neg register
  5183. je target --> jo target
  5184. .... only if register is deallocated before jump.}
  5185. case Taicpu(p).opsize of
  5186. S_B: v:=$80;
  5187. S_W: v:=$8000;
  5188. S_L: v:=qword($80000000);
  5189. else
  5190. internalerror(2013112905);
  5191. end;
  5192. if (taicpu(p).oper[0]^.val=v) and
  5193. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5194. (Taicpu(hp1).condition in [C_E,C_NE]) then
  5195. begin
  5196. TransferUsedRegs(TmpUsedRegs);
  5197. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  5198. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  5199. begin
  5200. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  5201. Taicpu(p).opcode:=A_NEG;
  5202. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  5203. Taicpu(p).clearop(1);
  5204. Taicpu(p).ops:=1;
  5205. if Taicpu(hp1).condition=C_E then
  5206. Taicpu(hp1).condition:=C_O
  5207. else
  5208. Taicpu(hp1).condition:=C_NO;
  5209. Result:=true;
  5210. exit;
  5211. end;
  5212. end;
  5213. end;
  5214. end;
  5215. if MatchInstruction(hp1,A_MOV,[]) and
  5216. (
  5217. (taicpu(p).oper[0]^.typ <> top_reg) or
  5218. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  5219. ) and
  5220. (
  5221. (taicpu(p).oper[1]^.typ <> top_reg) or
  5222. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
  5223. ) and
  5224. (
  5225. { Make sure the register written to doesn't appear in the
  5226. cmp instruction (in a reference, say) }
  5227. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  5228. not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
  5229. ) then
  5230. begin
  5231. { If we have something like:
  5232. cmp ###,%reg1
  5233. mov 0,%reg2
  5234. And no registers are shared, move the MOV command to before the
  5235. comparison as this means it can be optimised without worrying
  5236. about the FLAGS register. (This combination is generated by
  5237. "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  5238. }
  5239. SwapMovCmp(p, hp1);
  5240. Result := True;
  5241. Exit;
  5242. end;
  5243. end;
  5244. function TX86AsmOptimizer.OptPass1PXor(var p: tai): boolean;
  5245. var
  5246. hp1: tai;
  5247. begin
  5248. {
  5249. remove the second (v)pxor from
  5250. pxor reg,reg
  5251. ...
  5252. pxor reg,reg
  5253. }
  5254. Result:=false;
  5255. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5256. MatchOpType(taicpu(p),top_reg,top_reg) and
  5257. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5258. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5259. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5260. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) then
  5261. begin
  5262. DebugMsg(SPeepholeOptimization + 'PXorPXor2PXor done',hp1);
  5263. RemoveInstruction(hp1);
  5264. Result:=true;
  5265. Exit;
  5266. end
  5267. {
  5268. replace
  5269. pxor reg1,reg1
  5270. movapd/s reg1,reg2
  5271. dealloc reg1
  5272. by
  5273. pxor reg2,reg2
  5274. }
  5275. else if GetNextInstruction(p,hp1) and
  5276. { we mix single and double opperations here because we assume that the compiler
  5277. generates vmovapd only after double operations and vmovaps only after single operations }
  5278. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  5279. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5280. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  5281. (taicpu(p).oper[0]^.typ=top_reg) then
  5282. begin
  5283. TransferUsedRegs(TmpUsedRegs);
  5284. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5285. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  5286. begin
  5287. taicpu(p).loadoper(0,taicpu(hp1).oper[1]^);
  5288. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  5289. DebugMsg(SPeepholeOptimization + 'PXorMovapd2PXor done',p);
  5290. RemoveInstruction(hp1);
  5291. result:=true;
  5292. end;
  5293. end;
  5294. end;
  5295. function TX86AsmOptimizer.OptPass1VPXor(var p: tai): boolean;
  5296. var
  5297. hp1: tai;
  5298. begin
  5299. {
  5300. remove the second (v)pxor from
  5301. (v)pxor reg,reg
  5302. ...
  5303. (v)pxor reg,reg
  5304. }
  5305. Result:=false;
  5306. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^,taicpu(p).oper[2]^) and
  5307. MatchOpType(taicpu(p),top_reg,top_reg,top_reg) and
  5308. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5309. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5310. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5311. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^,taicpu(hp1).oper[2]^) then
  5312. begin
  5313. DebugMsg(SPeepholeOptimization + 'VPXorVPXor2PXor done',hp1);
  5314. RemoveInstruction(hp1);
  5315. Result:=true;
  5316. Exit;
  5317. end
  5318. else
  5319. Result:=OptPass1VOP(p);
  5320. end;
  5321. function TX86AsmOptimizer.OptPass1Imul(var p: tai): boolean;
  5322. var
  5323. hp1 : tai;
  5324. begin
  5325. result:=false;
  5326. { replace
  5327. IMul const,%mreg1,%mreg2
  5328. Mov %reg2,%mreg3
  5329. dealloc %mreg3
  5330. by
  5331. Imul const,%mreg1,%mreg23
  5332. }
  5333. if (taicpu(p).ops=3) and
  5334. GetNextInstruction(p,hp1) and
  5335. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5336. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5337. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5338. begin
  5339. TransferUsedRegs(TmpUsedRegs);
  5340. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5341. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5342. begin
  5343. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5344. DebugMsg(SPeepholeOptimization + 'ImulMov2Imul done',p);
  5345. RemoveInstruction(hp1);
  5346. result:=true;
  5347. end;
  5348. end;
  5349. end;
  5350. function TX86AsmOptimizer.OptPass1SHXX(var p: tai): boolean;
  5351. var
  5352. hp1 : tai;
  5353. begin
  5354. result:=false;
  5355. { replace
  5356. IMul %reg0,%reg1,%reg2
  5357. Mov %reg2,%reg3
  5358. dealloc %reg2
  5359. by
  5360. Imul %reg0,%reg1,%reg3
  5361. }
  5362. if GetNextInstruction(p,hp1) and
  5363. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5364. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5365. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5366. begin
  5367. TransferUsedRegs(TmpUsedRegs);
  5368. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5369. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5370. begin
  5371. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5372. DebugMsg(SPeepholeOptimization + 'SHXXMov2SHXX done',p);
  5373. RemoveInstruction(hp1);
  5374. result:=true;
  5375. end;
  5376. end;
  5377. end;
  5378. function TX86AsmOptimizer.OptPass1_V_Cvtss2sd(var p: tai): boolean;
  5379. var
  5380. hp1: tai;
  5381. begin
  5382. Result:=false;
  5383. { get rid of
  5384. (v)cvtss2sd reg0,<reg1,>reg2
  5385. (v)cvtss2sd reg2,<reg2,>reg0
  5386. }
  5387. if GetNextInstruction(p,hp1) and
  5388. (((taicpu(p).opcode=A_CVTSS2SD) and MatchInstruction(hp1,A_CVTSD2SS,[taicpu(p).opsize]) and
  5389. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^)) or
  5390. ((taicpu(p).opcode=A_VCVTSS2SD) and MatchInstruction(hp1,A_VCVTSD2SS,[taicpu(p).opsize]) and
  5391. MatchOpType(taicpu(p),top_reg,top_reg,top_reg) and
  5392. MatchOpType(taicpu(hp1),top_reg,top_reg,top_reg) and
  5393. (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  5394. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  5395. (getsupreg(taicpu(p).oper[2]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg))
  5396. )
  5397. ) then
  5398. begin
  5399. if getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg) then
  5400. begin
  5401. DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Nop done',p);
  5402. RemoveCurrentP(p);
  5403. RemoveInstruction(hp1);
  5404. end
  5405. else
  5406. begin
  5407. DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Vmovaps done',p);
  5408. taicpu(p).loadreg(1,taicpu(hp1).oper[2]^.reg);
  5409. taicpu(p).ops:=2;
  5410. taicpu(p).opcode:=A_VMOVAPS;
  5411. RemoveInstruction(hp1);
  5412. end;
  5413. Result:=true;
  5414. Exit;
  5415. end;
  5416. end;
  5417. function TX86AsmOptimizer.OptPass1Jcc(var p : tai) : boolean;
  5418. var
  5419. hp1, hp2, hp3, hp4, hp5: tai;
  5420. ThisReg: TRegister;
  5421. begin
  5422. Result := False;
  5423. if not GetNextInstruction(p,hp1) or (hp1.typ <> ait_instruction) then
  5424. Exit;
  5425. {
  5426. convert
  5427. j<c> .L1
  5428. mov 1,reg
  5429. jmp .L2
  5430. .L1
  5431. mov 0,reg
  5432. .L2
  5433. into
  5434. mov 0,reg
  5435. set<not(c)> reg
  5436. take care of alignment and that the mov 0,reg is not converted into a xor as this
  5437. would destroy the flag contents
  5438. Use MOVZX if size is preferred, since while mov 0,reg is bigger, it can be
  5439. executed at the same time as a previous comparison.
  5440. set<not(c)> reg
  5441. movzx reg, reg
  5442. }
  5443. if MatchInstruction(hp1,A_MOV,[]) and
  5444. (taicpu(hp1).oper[0]^.typ = top_const) and
  5445. (
  5446. (
  5447. (taicpu(hp1).oper[1]^.typ = top_reg)
  5448. {$ifdef i386}
  5449. { Under i386, ESI, EDI, EBP and ESP
  5450. don't have an 8-bit representation }
  5451. and not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  5452. {$endif i386}
  5453. ) or (
  5454. {$ifdef i386}
  5455. (taicpu(hp1).oper[1]^.typ <> top_reg) and
  5456. {$endif i386}
  5457. (taicpu(hp1).opsize = S_B)
  5458. )
  5459. ) and
  5460. GetNextInstruction(hp1,hp2) and
  5461. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  5462. GetNextInstruction(hp2,hp3) and
  5463. SkipAligns(hp3, hp3) and
  5464. (hp3.typ=ait_label) and
  5465. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  5466. GetNextInstruction(hp3,hp4) and
  5467. MatchInstruction(hp4,A_MOV,[taicpu(hp1).opsize]) and
  5468. (taicpu(hp4).oper[0]^.typ = top_const) and
  5469. (
  5470. ((taicpu(hp1).oper[0]^.val = 0) and (taicpu(hp4).oper[0]^.val = 1)) or
  5471. ((taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0))
  5472. ) and
  5473. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  5474. GetNextInstruction(hp4,hp5) and
  5475. SkipAligns(hp5, hp5) and
  5476. (hp5.typ=ait_label) and
  5477. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) then
  5478. begin
  5479. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5480. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  5481. tai_label(hp3).labsym.DecRefs;
  5482. { If this isn't the only reference to the middle label, we can
  5483. still make a saving - only that the first jump and everything
  5484. that follows will remain. }
  5485. if (tai_label(hp3).labsym.getrefs = 0) then
  5486. begin
  5487. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5488. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c)',p)
  5489. else
  5490. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c)',p);
  5491. { remove jump, first label and second MOV (also catching any aligns) }
  5492. repeat
  5493. if not GetNextInstruction(hp2, hp3) then
  5494. InternalError(2021040810);
  5495. RemoveInstruction(hp2);
  5496. hp2 := hp3;
  5497. until hp2 = hp5;
  5498. { Don't decrement reference count before the removal loop
  5499. above, otherwise GetNextInstruction won't stop on the
  5500. the label }
  5501. tai_label(hp5).labsym.DecRefs;
  5502. end
  5503. else
  5504. begin
  5505. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5506. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c) (partial)',p)
  5507. else
  5508. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c) (partial)',p);
  5509. end;
  5510. taicpu(p).opcode:=A_SETcc;
  5511. taicpu(p).opsize:=S_B;
  5512. taicpu(p).is_jmp:=False;
  5513. if taicpu(hp1).opsize=S_B then
  5514. begin
  5515. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  5516. if taicpu(hp1).oper[1]^.typ = top_reg then
  5517. AllocRegBetween(taicpu(hp1).oper[1]^.reg, p, hp2, UsedRegs);
  5518. RemoveInstruction(hp1);
  5519. end
  5520. else
  5521. begin
  5522. { Will be a register because the size can't be S_B otherwise }
  5523. ThisReg := newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBL);
  5524. taicpu(p).loadreg(0, ThisReg);
  5525. AllocRegBetween(ThisReg, p, hp2, UsedRegs);
  5526. if (cs_opt_size in current_settings.optimizerswitches) and IsMOVZXAcceptable then
  5527. begin
  5528. case taicpu(hp1).opsize of
  5529. S_W:
  5530. taicpu(hp1).opsize := S_BW;
  5531. S_L:
  5532. taicpu(hp1).opsize := S_BL;
  5533. {$ifdef x86_64}
  5534. S_Q:
  5535. begin
  5536. taicpu(hp1).opsize := S_BL;
  5537. { Change the destination register to 32-bit }
  5538. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER,getsupreg(ThisReg), R_SUBD));
  5539. end;
  5540. {$endif x86_64}
  5541. else
  5542. InternalError(2021040820);
  5543. end;
  5544. taicpu(hp1).opcode := A_MOVZX;
  5545. taicpu(hp1).loadreg(0, ThisReg);
  5546. end
  5547. else
  5548. begin
  5549. AllocRegBetween(NR_FLAGS,p,hp1,UsedRegs);
  5550. { hp1 is already a MOV instruction with the correct register }
  5551. taicpu(hp1).loadconst(0, 0);
  5552. { Inserting it right before p will guarantee that the flags are also tracked }
  5553. asml.Remove(hp1);
  5554. asml.InsertBefore(hp1, p);
  5555. end;
  5556. end;
  5557. Result:=true;
  5558. exit;
  5559. end
  5560. end;
  5561. function TX86AsmOptimizer.CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  5562. var
  5563. hp2, hp3, first_assignment: tai;
  5564. IncCount, OperIdx: Integer;
  5565. OrigLabel: TAsmLabel;
  5566. begin
  5567. Count := 0;
  5568. Result := False;
  5569. first_assignment := nil;
  5570. if (LoopCount >= 20) then
  5571. begin
  5572. { Guard against infinite loops }
  5573. Exit;
  5574. end;
  5575. if (taicpu(p).oper[0]^.typ <> top_ref) or
  5576. (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) or
  5577. (taicpu(p).oper[0]^.ref^.base <> NR_NO) or
  5578. (taicpu(p).oper[0]^.ref^.index <> NR_NO) or
  5579. not (taicpu(p).oper[0]^.ref^.symbol is TAsmLabel) then
  5580. Exit;
  5581. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  5582. {
  5583. change
  5584. jmp .L1
  5585. ...
  5586. .L1:
  5587. mov ##, ## ( multiple movs possible )
  5588. jmp/ret
  5589. into
  5590. mov ##, ##
  5591. jmp/ret
  5592. }
  5593. if not Assigned(hp1) then
  5594. begin
  5595. hp1 := GetLabelWithSym(OrigLabel);
  5596. if not Assigned(hp1) or not SkipLabels(hp1, hp1) then
  5597. Exit;
  5598. end;
  5599. hp2 := hp1;
  5600. while Assigned(hp2) do
  5601. begin
  5602. if Assigned(hp2) and (hp2.typ in [ait_label, ait_align]) then
  5603. SkipLabels(hp2,hp2);
  5604. if not Assigned(hp2) or (hp2.typ <> ait_instruction) then
  5605. Break;
  5606. case taicpu(hp2).opcode of
  5607. A_MOVSS:
  5608. begin
  5609. if taicpu(hp2).ops = 0 then
  5610. { Wrong MOVSS }
  5611. Break;
  5612. Inc(Count);
  5613. if Count >= 5 then
  5614. { Too many to be worthwhile }
  5615. Break;
  5616. GetNextInstruction(hp2, hp2);
  5617. Continue;
  5618. end;
  5619. A_MOV,
  5620. A_MOVD,
  5621. A_MOVQ,
  5622. A_MOVSX,
  5623. {$ifdef x86_64}
  5624. A_MOVSXD,
  5625. {$endif x86_64}
  5626. A_MOVZX,
  5627. A_MOVAPS,
  5628. A_MOVUPS,
  5629. A_MOVSD,
  5630. A_MOVAPD,
  5631. A_MOVUPD,
  5632. A_MOVDQA,
  5633. A_MOVDQU,
  5634. A_VMOVSS,
  5635. A_VMOVAPS,
  5636. A_VMOVUPS,
  5637. A_VMOVSD,
  5638. A_VMOVAPD,
  5639. A_VMOVUPD,
  5640. A_VMOVDQA,
  5641. A_VMOVDQU:
  5642. begin
  5643. Inc(Count);
  5644. if Count >= 5 then
  5645. { Too many to be worthwhile }
  5646. Break;
  5647. GetNextInstruction(hp2, hp2);
  5648. Continue;
  5649. end;
  5650. A_JMP:
  5651. begin
  5652. { Guard against infinite loops }
  5653. if taicpu(hp2).oper[0]^.ref^.symbol = OrigLabel then
  5654. Exit;
  5655. { Analyse this jump first in case it also duplicates assignments }
  5656. if CheckJumpMovTransferOpt(hp2, nil, LoopCount + 1, IncCount) then
  5657. begin
  5658. { Something did change! }
  5659. Result := True;
  5660. Inc(Count, IncCount);
  5661. if Count >= 5 then
  5662. begin
  5663. { Too many to be worthwhile }
  5664. Exit;
  5665. end;
  5666. if MatchInstruction(hp2, [A_JMP, A_RET], []) then
  5667. Break;
  5668. end;
  5669. Result := True;
  5670. Break;
  5671. end;
  5672. A_RET:
  5673. begin
  5674. Result := True;
  5675. Break;
  5676. end;
  5677. else
  5678. Break;
  5679. end;
  5680. end;
  5681. if Result then
  5682. begin
  5683. { A count of zero can happen when CheckJumpMovTransferOpt is called recursively }
  5684. if Count = 0 then
  5685. begin
  5686. Result := False;
  5687. Exit;
  5688. end;
  5689. hp3 := p;
  5690. DebugMsg(SPeepholeOptimization + 'Duplicated ' + debug_tostr(Count) + ' assignment(s) and redirected jump', p);
  5691. while True do
  5692. begin
  5693. if Assigned(hp1) and (hp1.typ in [ait_label, ait_align]) then
  5694. SkipLabels(hp1,hp1);
  5695. if (hp1.typ <> ait_instruction) then
  5696. InternalError(2021040720);
  5697. case taicpu(hp1).opcode of
  5698. A_JMP:
  5699. begin
  5700. { Change the original jump to the new destination }
  5701. OrigLabel.decrefs;
  5702. taicpu(hp1).oper[0]^.ref^.symbol.increfs;
  5703. taicpu(p).loadref(0, taicpu(hp1).oper[0]^.ref^);
  5704. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5705. if not Assigned(first_assignment) then
  5706. InternalError(2021040810)
  5707. else
  5708. p := first_assignment;
  5709. Exit;
  5710. end;
  5711. A_RET:
  5712. begin
  5713. { Now change the jump into a RET instruction }
  5714. ConvertJumpToRET(p, hp1);
  5715. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5716. if not Assigned(first_assignment) then
  5717. InternalError(2021040811)
  5718. else
  5719. p := first_assignment;
  5720. Exit;
  5721. end;
  5722. else
  5723. begin
  5724. { Duplicate the MOV instruction }
  5725. hp3:=tai(hp1.getcopy);
  5726. if first_assignment = nil then
  5727. first_assignment := hp3;
  5728. asml.InsertBefore(hp3, p);
  5729. { Make sure the compiler knows about any final registers written here }
  5730. for OperIdx := 0 to taicpu(hp3).ops - 1 do
  5731. with taicpu(hp3).oper[OperIdx]^ do
  5732. begin
  5733. case typ of
  5734. top_ref:
  5735. begin
  5736. if (ref^.base <> NR_NO) and
  5737. (getsupreg(ref^.base) <> RS_ESP) and
  5738. (getsupreg(ref^.base) <> RS_EBP)
  5739. {$ifdef x86_64} and (ref^.base <> NR_RIP) {$endif x86_64}
  5740. then
  5741. AllocRegBetween(ref^.base, hp3, tai(p.Next), UsedRegs);
  5742. if (ref^.index <> NR_NO) and
  5743. (getsupreg(ref^.index) <> RS_ESP) and
  5744. (getsupreg(ref^.index) <> RS_EBP)
  5745. {$ifdef x86_64} and (ref^.index <> NR_RIP) {$endif x86_64} and
  5746. (ref^.index <> ref^.base) then
  5747. AllocRegBetween(ref^.index, hp3, tai(p.Next), UsedRegs);
  5748. end;
  5749. top_reg:
  5750. AllocRegBetween(reg, hp3, tai(p.Next), UsedRegs);
  5751. else
  5752. ;
  5753. end;
  5754. end;
  5755. end;
  5756. end;
  5757. if not GetNextInstruction(hp1, hp1) then
  5758. { Should have dropped out earlier }
  5759. InternalError(2021040710);
  5760. end;
  5761. end;
  5762. end;
  5763. procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
  5764. var
  5765. hp2: tai;
  5766. X: Integer;
  5767. begin
  5768. asml.Remove(hp1);
  5769. { Try to insert after the last instructions where the FLAGS register is not yet in use }
  5770. if not GetLastInstruction(p, hp2) then
  5771. asml.InsertBefore(hp1, p)
  5772. else
  5773. asml.InsertAfter(hp1, hp2);
  5774. DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
  5775. for X := 0 to 1 do
  5776. case taicpu(hp1).oper[X]^.typ of
  5777. top_reg:
  5778. AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
  5779. top_ref:
  5780. begin
  5781. if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
  5782. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
  5783. if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
  5784. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
  5785. end;
  5786. else
  5787. ;
  5788. end;
  5789. end;
  5790. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  5791. function IsXCHGAcceptable: Boolean; inline;
  5792. begin
  5793. { Always accept if optimising for size }
  5794. Result := (cs_opt_size in current_settings.optimizerswitches) or
  5795. (
  5796. {$ifdef x86_64}
  5797. { XCHG takes 3 cycles on AMD Athlon64 }
  5798. (current_settings.optimizecputype >= cpu_core_i)
  5799. {$else x86_64}
  5800. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  5801. than 3, so it becomes a saving compared to three MOVs with two of
  5802. them able to execute simultaneously. [Kit] }
  5803. (current_settings.optimizecputype >= cpu_PentiumM)
  5804. {$endif x86_64}
  5805. );
  5806. end;
  5807. var
  5808. NewRef: TReference;
  5809. hp1, hp2, hp3, hp4: Tai;
  5810. {$ifndef x86_64}
  5811. OperIdx: Integer;
  5812. {$endif x86_64}
  5813. NewInstr : Taicpu;
  5814. NewAligh : Tai_align;
  5815. DestLabel: TAsmLabel;
  5816. begin
  5817. Result:=false;
  5818. { This optimisation adds an instruction, so only do it for speed }
  5819. if not (cs_opt_size in current_settings.optimizerswitches) and
  5820. MatchOpType(taicpu(p), top_const, top_reg) and
  5821. (taicpu(p).oper[0]^.val = 0) then
  5822. begin
  5823. { To avoid compiler warning }
  5824. DestLabel := nil;
  5825. if (p.typ <> ait_instruction) or (taicpu(p).oper[1]^.typ <> top_reg) then
  5826. InternalError(2021040750);
  5827. if not GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) then
  5828. Exit;
  5829. case hp1.typ of
  5830. ait_label:
  5831. begin
  5832. { Change:
  5833. mov $0,%reg mov $0,%reg
  5834. @Lbl1: @Lbl1:
  5835. test %reg,%reg / cmp $0,%reg test %reg,%reg / mov $0,%reg
  5836. je @Lbl2 jne @Lbl2
  5837. To: To:
  5838. mov $0,%reg mov $0,%reg
  5839. jmp @Lbl2 jmp @Lbl3
  5840. (align) (align)
  5841. @Lbl1: @Lbl1:
  5842. test %reg,%reg / cmp $0,%reg test %reg,%reg / cmp $0,%reg
  5843. je @Lbl2 je @Lbl2
  5844. @Lbl3: <-- Only if label exists
  5845. (Not if it's optimised for size)
  5846. }
  5847. if not GetNextInstruction(hp1, hp2) then
  5848. Exit;
  5849. if not (cs_opt_size in current_settings.optimizerswitches) and
  5850. (hp2.typ = ait_instruction) and
  5851. (
  5852. { Register sizes must exactly match }
  5853. (
  5854. (taicpu(hp2).opcode = A_CMP) and
  5855. MatchOperand(taicpu(hp2).oper[0]^, 0) and
  5856. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5857. ) or (
  5858. (taicpu(hp2).opcode = A_TEST) and
  5859. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  5860. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5861. )
  5862. ) and GetNextInstruction(hp2, hp3) and
  5863. (hp3.typ = ait_instruction) and
  5864. (taicpu(hp3).opcode = A_JCC) and
  5865. (taicpu(hp3).oper[0]^.typ=top_ref) and (taicpu(hp3).oper[0]^.ref^.refaddr=addr_full) and (taicpu(hp3).oper[0]^.ref^.base=NR_NO) and
  5866. (taicpu(hp3).oper[0]^.ref^.index=NR_NO) and (taicpu(hp3).oper[0]^.ref^.symbol is tasmlabel) then
  5867. begin
  5868. { Check condition of jump }
  5869. { Always true? }
  5870. if condition_in(C_E, taicpu(hp3).condition) then
  5871. begin
  5872. { Copy label symbol and obtain matching label entry for the
  5873. conditional jump, as this will be our destination}
  5874. DestLabel := tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol);
  5875. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Je -> Mov0JmpLblCmp0Je', p);
  5876. Result := True;
  5877. end
  5878. { Always false? }
  5879. else if condition_in(C_NE, taicpu(hp3).condition) and GetNextInstruction(hp3, hp2) then
  5880. begin
  5881. { This is only worth it if there's a jump to take }
  5882. case hp2.typ of
  5883. ait_instruction:
  5884. begin
  5885. if taicpu(hp2).opcode = A_JMP then
  5886. begin
  5887. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5888. { An unconditional jump follows the conditional jump which will always be false,
  5889. so use this jump's destination for the new jump }
  5890. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with JMP)', p);
  5891. Result := True;
  5892. end
  5893. else if taicpu(hp2).opcode = A_JCC then
  5894. begin
  5895. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5896. if condition_in(C_E, taicpu(hp2).condition) then
  5897. begin
  5898. { A second conditional jump follows the conditional jump which will always be false,
  5899. while the second jump is always True, so use this jump's destination for the new jump }
  5900. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with second Jcc)', p);
  5901. Result := True;
  5902. end;
  5903. { Don't risk it if the jump isn't always true (Result remains False) }
  5904. end;
  5905. end;
  5906. else
  5907. { If anything else don't optimise };
  5908. end;
  5909. end;
  5910. if Result then
  5911. begin
  5912. { Just so we have something to insert as a paremeter}
  5913. reference_reset(NewRef, 1, []);
  5914. NewInstr := taicpu.op_ref(A_JMP, S_NO, NewRef);
  5915. { Now actually load the correct parameter }
  5916. NewInstr.loadsymbol(0, DestLabel, 0);
  5917. { Get instruction before original label (may not be p under -O3) }
  5918. if not GetLastInstruction(hp1, hp2) then
  5919. { Shouldn't fail here }
  5920. InternalError(2021040701);
  5921. DestLabel.increfs;
  5922. AsmL.InsertAfter(NewInstr, hp2);
  5923. { Add new alignment field }
  5924. (* AsmL.InsertAfter(
  5925. cai_align.create_max(
  5926. current_settings.alignment.jumpalign,
  5927. current_settings.alignment.jumpalignskipmax
  5928. ),
  5929. NewInstr
  5930. ); *)
  5931. end;
  5932. Exit;
  5933. end;
  5934. end;
  5935. else
  5936. ;
  5937. end;
  5938. end;
  5939. if not GetNextInstruction(p, hp1) then
  5940. Exit;
  5941. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  5942. begin
  5943. { Sometimes the MOVs that OptPass2JMP produces can be improved
  5944. further, but we can't just put this jump optimisation in pass 1
  5945. because it tends to perform worse when conditional jumps are
  5946. nearby (e.g. when converting CMOV instructions). [Kit] }
  5947. if OptPass2JMP(hp1) then
  5948. { call OptPass1MOV once to potentially merge any MOVs that were created }
  5949. Result := OptPass1MOV(p)
  5950. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  5951. returned True and the instruction is still a MOV, thus checking
  5952. the optimisations below }
  5953. { If OptPass2JMP returned False, no optimisations were done to
  5954. the jump and there are no further optimisations that can be done
  5955. to the MOV instruction on this pass }
  5956. end
  5957. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5958. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  5959. MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
  5960. MatchOpType(taicpu(hp1),top_const,top_reg) and
  5961. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  5962. { be lazy, checking separately for sub would be slightly better }
  5963. (abs(taicpu(hp1).oper[0]^.val)<=$7fffffff) then
  5964. begin
  5965. { Change:
  5966. movl/q %reg1,%reg2 movl/q %reg1,%reg2
  5967. addl/q $x,%reg2 subl/q $x,%reg2
  5968. To:
  5969. leal/q x(%reg1),%reg2 leal/q -x(%reg1),%reg2
  5970. }
  5971. TransferUsedRegs(TmpUsedRegs);
  5972. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  5973. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  5974. if not GetNextInstruction(hp1, hp2) or
  5975. (
  5976. { The FLAGS register isn't always tracked properly, so do not
  5977. perform this optimisation if a conditional statement follows }
  5978. not RegReadByInstruction(NR_DEFAULTFLAGS, hp2) and
  5979. not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)
  5980. ) then
  5981. begin
  5982. reference_reset(NewRef, 1, []);
  5983. NewRef.base := taicpu(p).oper[0]^.reg;
  5984. NewRef.scalefactor := 1;
  5985. if taicpu(hp1).opcode = A_ADD then
  5986. begin
  5987. DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
  5988. NewRef.offset := taicpu(hp1).oper[0]^.val;
  5989. end
  5990. else
  5991. begin
  5992. DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
  5993. NewRef.offset := -taicpu(hp1).oper[0]^.val;
  5994. end;
  5995. taicpu(p).opcode := A_LEA;
  5996. taicpu(p).loadref(0, NewRef);
  5997. RemoveInstruction(hp1);
  5998. Result := True;
  5999. Exit;
  6000. end;
  6001. end
  6002. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  6003. {$ifdef x86_64}
  6004. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  6005. {$else x86_64}
  6006. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  6007. {$endif x86_64}
  6008. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6009. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  6010. { mov reg1, reg2 mov reg1, reg2
  6011. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  6012. begin
  6013. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  6014. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  6015. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  6016. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  6017. TransferUsedRegs(TmpUsedRegs);
  6018. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  6019. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  6020. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  6021. then
  6022. begin
  6023. RemoveCurrentP(p, hp1);
  6024. Result:=true;
  6025. end;
  6026. exit;
  6027. end
  6028. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  6029. IsXCHGAcceptable and
  6030. { XCHG doesn't support 8-byte registers }
  6031. (taicpu(p).opsize <> S_B) and
  6032. MatchInstruction(hp1, A_MOV, []) and
  6033. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6034. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  6035. GetNextInstruction(hp1, hp2) and
  6036. MatchInstruction(hp2, A_MOV, []) and
  6037. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  6038. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  6039. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  6040. begin
  6041. { mov %reg1,%reg2
  6042. mov %reg3,%reg1 -> xchg %reg3,%reg1
  6043. mov %reg2,%reg3
  6044. (%reg2 not used afterwards)
  6045. Note that xchg takes 3 cycles to execute, and generally mov's take
  6046. only one cycle apiece, but the first two mov's can be executed in
  6047. parallel, only taking 2 cycles overall. Older processors should
  6048. therefore only optimise for size. [Kit]
  6049. }
  6050. TransferUsedRegs(TmpUsedRegs);
  6051. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  6052. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  6053. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  6054. begin
  6055. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  6056. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  6057. taicpu(hp1).opcode := A_XCHG;
  6058. RemoveCurrentP(p, hp1);
  6059. RemoveInstruction(hp2);
  6060. Result := True;
  6061. Exit;
  6062. end;
  6063. end
  6064. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  6065. MatchInstruction(hp1, A_SAR, []) then
  6066. begin
  6067. if MatchOperand(taicpu(hp1).oper[0]^, 31) then
  6068. begin
  6069. { the use of %edx also covers the opsize being S_L }
  6070. if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
  6071. begin
  6072. { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
  6073. if (taicpu(p).oper[0]^.reg = NR_EAX) and
  6074. (taicpu(p).oper[1]^.reg = NR_EDX) then
  6075. begin
  6076. { Change:
  6077. movl %eax,%edx
  6078. sarl $31,%edx
  6079. To:
  6080. cltd
  6081. }
  6082. DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
  6083. RemoveInstruction(hp1);
  6084. taicpu(p).opcode := A_CDQ;
  6085. taicpu(p).opsize := S_NO;
  6086. taicpu(p).clearop(1);
  6087. taicpu(p).clearop(0);
  6088. taicpu(p).ops:=0;
  6089. Result := True;
  6090. end
  6091. else if (cs_opt_size in current_settings.optimizerswitches) and
  6092. (taicpu(p).oper[0]^.reg = NR_EDX) and
  6093. (taicpu(p).oper[1]^.reg = NR_EAX) then
  6094. begin
  6095. { Change:
  6096. movl %edx,%eax
  6097. sarl $31,%edx
  6098. To:
  6099. movl %edx,%eax
  6100. cltd
  6101. Note that this creates a dependency between the two instructions,
  6102. so only perform if optimising for size.
  6103. }
  6104. DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
  6105. taicpu(hp1).opcode := A_CDQ;
  6106. taicpu(hp1).opsize := S_NO;
  6107. taicpu(hp1).clearop(1);
  6108. taicpu(hp1).clearop(0);
  6109. taicpu(hp1).ops:=0;
  6110. end;
  6111. {$ifndef x86_64}
  6112. end
  6113. { Don't bother if CMOV is supported, because a more optimal
  6114. sequence would have been generated for the Abs() intrinsic }
  6115. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  6116. { the use of %eax also covers the opsize being S_L }
  6117. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
  6118. (taicpu(p).oper[0]^.reg = NR_EAX) and
  6119. (taicpu(p).oper[1]^.reg = NR_EDX) and
  6120. GetNextInstruction(hp1, hp2) and
  6121. MatchInstruction(hp2, A_XOR, [S_L]) and
  6122. MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
  6123. MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
  6124. GetNextInstruction(hp2, hp3) and
  6125. MatchInstruction(hp3, A_SUB, [S_L]) and
  6126. MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
  6127. MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
  6128. begin
  6129. { Change:
  6130. movl %eax,%edx
  6131. sarl $31,%eax
  6132. xorl %eax,%edx
  6133. subl %eax,%edx
  6134. (Instruction that uses %edx)
  6135. (%eax deallocated)
  6136. (%edx deallocated)
  6137. To:
  6138. cltd
  6139. xorl %edx,%eax <-- Note the registers have swapped
  6140. subl %edx,%eax
  6141. (Instruction that uses %eax) <-- %eax rather than %edx
  6142. }
  6143. TransferUsedRegs(TmpUsedRegs);
  6144. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  6145. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  6146. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  6147. if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
  6148. begin
  6149. if GetNextInstruction(hp3, hp4) and
  6150. not RegModifiedByInstruction(NR_EDX, hp4) and
  6151. not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
  6152. begin
  6153. DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
  6154. taicpu(p).opcode := A_CDQ;
  6155. taicpu(p).clearop(1);
  6156. taicpu(p).clearop(0);
  6157. taicpu(p).ops:=0;
  6158. RemoveInstruction(hp1);
  6159. taicpu(hp2).loadreg(0, NR_EDX);
  6160. taicpu(hp2).loadreg(1, NR_EAX);
  6161. taicpu(hp3).loadreg(0, NR_EDX);
  6162. taicpu(hp3).loadreg(1, NR_EAX);
  6163. AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
  6164. { Convert references in the following instruction (hp4) from %edx to %eax }
  6165. for OperIdx := 0 to taicpu(hp4).ops - 1 do
  6166. with taicpu(hp4).oper[OperIdx]^ do
  6167. case typ of
  6168. top_reg:
  6169. if getsupreg(reg) = RS_EDX then
  6170. reg := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6171. top_ref:
  6172. begin
  6173. if getsupreg(reg) = RS_EDX then
  6174. ref^.base := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6175. if getsupreg(reg) = RS_EDX then
  6176. ref^.index := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6177. end;
  6178. else
  6179. ;
  6180. end;
  6181. end;
  6182. end;
  6183. {$else x86_64}
  6184. end;
  6185. end
  6186. else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
  6187. { the use of %rdx also covers the opsize being S_Q }
  6188. MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
  6189. begin
  6190. { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
  6191. if (taicpu(p).oper[0]^.reg = NR_RAX) and
  6192. (taicpu(p).oper[1]^.reg = NR_RDX) then
  6193. begin
  6194. { Change:
  6195. movq %rax,%rdx
  6196. sarq $63,%rdx
  6197. To:
  6198. cqto
  6199. }
  6200. DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
  6201. RemoveInstruction(hp1);
  6202. taicpu(p).opcode := A_CQO;
  6203. taicpu(p).opsize := S_NO;
  6204. taicpu(p).clearop(1);
  6205. taicpu(p).clearop(0);
  6206. taicpu(p).ops:=0;
  6207. Result := True;
  6208. end
  6209. else if (cs_opt_size in current_settings.optimizerswitches) and
  6210. (taicpu(p).oper[0]^.reg = NR_RDX) and
  6211. (taicpu(p).oper[1]^.reg = NR_RAX) then
  6212. begin
  6213. { Change:
  6214. movq %rdx,%rax
  6215. sarq $63,%rdx
  6216. To:
  6217. movq %rdx,%rax
  6218. cqto
  6219. Note that this creates a dependency between the two instructions,
  6220. so only perform if optimising for size.
  6221. }
  6222. DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
  6223. taicpu(hp1).opcode := A_CQO;
  6224. taicpu(hp1).opsize := S_NO;
  6225. taicpu(hp1).clearop(1);
  6226. taicpu(hp1).clearop(0);
  6227. taicpu(hp1).ops:=0;
  6228. {$endif x86_64}
  6229. end;
  6230. end;
  6231. end
  6232. else if MatchInstruction(hp1, A_MOV, []) and
  6233. (taicpu(hp1).oper[1]^.typ = top_reg) then
  6234. { Though "GetNextInstruction" could be factored out, along with
  6235. the instructions that depend on hp2, it is an expensive call that
  6236. should be delayed for as long as possible, hence we do cheaper
  6237. checks first that are likely to be False. [Kit] }
  6238. begin
  6239. if (
  6240. (
  6241. MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
  6242. (taicpu(hp1).oper[1]^.reg = NR_EAX) and
  6243. (
  6244. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6245. MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
  6246. )
  6247. ) or
  6248. (
  6249. MatchOperand(taicpu(p).oper[1]^, NR_EAX) and
  6250. (taicpu(hp1).oper[1]^.reg = NR_EDX) and
  6251. (
  6252. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6253. MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
  6254. )
  6255. )
  6256. ) and
  6257. GetNextInstruction(hp1, hp2) and
  6258. MatchInstruction(hp2, A_SAR, []) and
  6259. MatchOperand(taicpu(hp2).oper[0]^, 31) then
  6260. begin
  6261. if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
  6262. begin
  6263. { Change:
  6264. movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
  6265. movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
  6266. sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
  6267. To:
  6268. movl r/m,%eax <- Note the change in register
  6269. cltd
  6270. }
  6271. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
  6272. AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
  6273. taicpu(p).loadreg(1, NR_EAX);
  6274. taicpu(hp1).opcode := A_CDQ;
  6275. taicpu(hp1).clearop(1);
  6276. taicpu(hp1).clearop(0);
  6277. taicpu(hp1).ops:=0;
  6278. RemoveInstruction(hp2);
  6279. (*
  6280. {$ifdef x86_64}
  6281. end
  6282. else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
  6283. { This code sequence does not get generated - however it might become useful
  6284. if and when 128-bit signed integer types make an appearance, so the code
  6285. is kept here for when it is eventually needed. [Kit] }
  6286. (
  6287. (
  6288. (taicpu(hp1).oper[1]^.reg = NR_RAX) and
  6289. (
  6290. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6291. MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
  6292. )
  6293. ) or
  6294. (
  6295. (taicpu(hp1).oper[1]^.reg = NR_RDX) and
  6296. (
  6297. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6298. MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
  6299. )
  6300. )
  6301. ) and
  6302. GetNextInstruction(hp1, hp2) and
  6303. MatchInstruction(hp2, A_SAR, [S_Q]) and
  6304. MatchOperand(taicpu(hp2).oper[0]^, 63) and
  6305. MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
  6306. begin
  6307. { Change:
  6308. movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
  6309. movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
  6310. sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
  6311. To:
  6312. movq r/m,%rax <- Note the change in register
  6313. cqto
  6314. }
  6315. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
  6316. AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
  6317. taicpu(p).loadreg(1, NR_RAX);
  6318. taicpu(hp1).opcode := A_CQO;
  6319. taicpu(hp1).clearop(1);
  6320. taicpu(hp1).clearop(0);
  6321. taicpu(hp1).ops:=0;
  6322. RemoveInstruction(hp2);
  6323. {$endif x86_64}
  6324. *)
  6325. end;
  6326. end;
  6327. {$ifdef x86_64}
  6328. end
  6329. else if (taicpu(p).opsize = S_L) and
  6330. (taicpu(p).oper[1]^.typ = top_reg) and
  6331. (
  6332. MatchInstruction(hp1, A_MOV,[]) and
  6333. (taicpu(hp1).opsize = S_L) and
  6334. (taicpu(hp1).oper[1]^.typ = top_reg)
  6335. ) and (
  6336. GetNextInstruction(hp1, hp2) and
  6337. (tai(hp2).typ=ait_instruction) and
  6338. (taicpu(hp2).opsize = S_Q) and
  6339. (
  6340. (
  6341. MatchInstruction(hp2, A_ADD,[]) and
  6342. (taicpu(hp2).opsize = S_Q) and
  6343. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6344. (
  6345. (
  6346. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6347. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6348. ) or (
  6349. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6350. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6351. )
  6352. )
  6353. ) or (
  6354. MatchInstruction(hp2, A_LEA,[]) and
  6355. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  6356. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  6357. (
  6358. (
  6359. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6360. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6361. ) or (
  6362. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6363. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  6364. )
  6365. ) and (
  6366. (
  6367. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6368. ) or (
  6369. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6370. )
  6371. )
  6372. )
  6373. )
  6374. ) and (
  6375. GetNextInstruction(hp2, hp3) and
  6376. MatchInstruction(hp3, A_SHR,[]) and
  6377. (taicpu(hp3).opsize = S_Q) and
  6378. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6379. (taicpu(hp3).oper[0]^.val = 1) and
  6380. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  6381. ) then
  6382. begin
  6383. { Change movl x, reg1d movl x, reg1d
  6384. movl y, reg2d movl y, reg2d
  6385. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  6386. shrq $1, reg1q shrq $1, reg1q
  6387. ( reg1d and reg2d can be switched around in the first two instructions )
  6388. To movl x, reg1d
  6389. addl y, reg1d
  6390. rcrl $1, reg1d
  6391. This corresponds to the common expression (x + y) shr 1, where
  6392. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  6393. smaller code, but won't account for x + y causing an overflow). [Kit]
  6394. }
  6395. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  6396. { Change first MOV command to have the same register as the final output }
  6397. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  6398. else
  6399. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  6400. { Change second MOV command to an ADD command. This is easier than
  6401. converting the existing command because it means we don't have to
  6402. touch 'y', which might be a complicated reference, and also the
  6403. fact that the third command might either be ADD or LEA. [Kit] }
  6404. taicpu(hp1).opcode := A_ADD;
  6405. { Delete old ADD/LEA instruction }
  6406. RemoveInstruction(hp2);
  6407. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  6408. taicpu(hp3).opcode := A_RCR;
  6409. taicpu(hp3).changeopsize(S_L);
  6410. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  6411. {$endif x86_64}
  6412. end;
  6413. end;
  6414. function TX86AsmOptimizer.OptPass2Movx(var p : tai) : boolean;
  6415. var
  6416. ThisReg: TRegister;
  6417. MinSize, MaxSize, TrySmaller, TargetSize: TOpSize;
  6418. TargetSubReg: TSubRegister;
  6419. hp1, hp2: tai;
  6420. RegInUse, RegChanged, p_removed: Boolean;
  6421. { Store list of found instructions so we don't have to call
  6422. GetNextInstructionUsingReg multiple times }
  6423. InstrList: array of taicpu;
  6424. InstrMax, Index: Integer;
  6425. UpperLimit, TrySmallerLimit: TCgInt;
  6426. PreMessage: string;
  6427. { Data flow analysis }
  6428. TestValMin, TestValMax: TCgInt;
  6429. SmallerOverflow: Boolean;
  6430. begin
  6431. Result := False;
  6432. p_removed := False;
  6433. { This is anything but quick! }
  6434. if not(cs_opt_level2 in current_settings.optimizerswitches) then
  6435. Exit;
  6436. SetLength(InstrList, 0);
  6437. InstrMax := -1;
  6438. ThisReg := taicpu(p).oper[1]^.reg;
  6439. case taicpu(p).opsize of
  6440. S_BW, S_BL:
  6441. begin
  6442. {$if defined(i386) or defined(i8086)}
  6443. { If the target size is 8-bit, make sure we can actually encode it }
  6444. if not (GetSupReg(ThisReg) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) then
  6445. Exit;
  6446. {$endif i386 or i8086}
  6447. UpperLimit := $FF;
  6448. MinSize := S_B;
  6449. if taicpu(p).opsize = S_BW then
  6450. MaxSize := S_W
  6451. else
  6452. MaxSize := S_L;
  6453. end;
  6454. S_WL:
  6455. begin
  6456. UpperLimit := $FFFF;
  6457. MinSize := S_W;
  6458. MaxSize := S_L;
  6459. end
  6460. else
  6461. InternalError(2020112301);
  6462. end;
  6463. TestValMin := 0;
  6464. TestValMax := UpperLimit;
  6465. TrySmallerLimit := UpperLimit;
  6466. TrySmaller := S_NO;
  6467. SmallerOverflow := False;
  6468. RegChanged := False;
  6469. hp1 := p;
  6470. while GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
  6471. (hp1.typ = ait_instruction) and
  6472. (
  6473. { Under -O1 and -O2, GetNextInstructionUsingReg may return an
  6474. instruction that doesn't actually contain ThisReg }
  6475. (cs_opt_level3 in current_settings.optimizerswitches) or
  6476. RegInInstruction(ThisReg, hp1)
  6477. ) do
  6478. begin
  6479. case taicpu(hp1).opcode of
  6480. A_INC,A_DEC:
  6481. begin
  6482. { Has to be an exact match on the register }
  6483. if not MatchOperand(taicpu(hp1).oper[0]^, ThisReg) then
  6484. Break;
  6485. if taicpu(hp1).opcode = A_INC then
  6486. begin
  6487. Inc(TestValMin);
  6488. Inc(TestValMax);
  6489. end
  6490. else
  6491. begin
  6492. Dec(TestValMin);
  6493. Dec(TestValMax);
  6494. end;
  6495. end;
  6496. A_CMP:
  6497. begin
  6498. if (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6499. { Has to be an exact match on the register }
  6500. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6501. (taicpu(hp1).oper[0]^.typ <> top_const) or
  6502. { Make sure the comparison value is not smaller than the
  6503. smallest allowed signed value for the minimum size (e.g.
  6504. -128 for 8-bit) }
  6505. not (
  6506. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6507. { Is it in the negative range? }
  6508. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6509. ) then
  6510. Break;
  6511. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6512. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6513. if (TestValMin < TrySmallerLimit) or (TestValMax < TrySmallerLimit) or
  6514. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6515. { Overflow }
  6516. Break;
  6517. { Check to see if the active register is used afterwards }
  6518. TransferUsedRegs(TmpUsedRegs);
  6519. IncludeRegInUsedRegs(ThisReg, TmpUsedRegs);
  6520. if not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6521. begin
  6522. case MinSize of
  6523. S_B:
  6524. TargetSubReg := R_SUBL;
  6525. S_W:
  6526. TargetSubReg := R_SUBW;
  6527. else
  6528. InternalError(2021051002);
  6529. end;
  6530. { Update the register to its new size }
  6531. setsubreg(ThisReg, TargetSubReg);
  6532. taicpu(hp1).oper[1]^.reg := ThisReg;
  6533. taicpu(hp1).opsize := MinSize;
  6534. { Convert the input MOVZX to a MOV }
  6535. if (taicpu(p).oper[0]^.typ = top_reg) and
  6536. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6537. begin
  6538. { Or remove it completely! }
  6539. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1a', p);
  6540. RemoveCurrentP(p);
  6541. p_removed := True;
  6542. end
  6543. else
  6544. begin
  6545. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1a', p);
  6546. taicpu(p).opcode := A_MOV;
  6547. taicpu(p).oper[1]^.reg := ThisReg;
  6548. taicpu(p).opsize := MinSize;
  6549. end;
  6550. if (InstrMax >= 0) then
  6551. begin
  6552. for Index := 0 to InstrMax do
  6553. begin
  6554. { If p_removed is true, then the original MOV/Z was removed
  6555. and removing the AND instruction may not be safe if it
  6556. appears first }
  6557. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6558. InternalError(2020112311);
  6559. if InstrList[Index].oper[0]^.typ = top_reg then
  6560. InstrList[Index].oper[0]^.reg := ThisReg;
  6561. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6562. InstrList[Index].opsize := MinSize;
  6563. end;
  6564. end;
  6565. Result := True;
  6566. Exit;
  6567. end;
  6568. end;
  6569. { OR and XOR are not included because they can too easily fool
  6570. the data flow analysis (they can cause non-linear behaviour) }
  6571. A_ADD,A_SUB,A_AND,A_SHL,A_SHR:
  6572. begin
  6573. if
  6574. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6575. { Has to be an exact match on the register }
  6576. (taicpu(hp1).oper[1]^.reg <> ThisReg) or not
  6577. (
  6578. (
  6579. (taicpu(hp1).oper[0]^.typ = top_const) and
  6580. (
  6581. (
  6582. (taicpu(hp1).opcode = A_SHL) and
  6583. (
  6584. ((MinSize = S_B) and (taicpu(hp1).oper[0]^.val < 8)) or
  6585. ((MinSize = S_W) and (taicpu(hp1).oper[0]^.val < 16)) or
  6586. ((MinSize = S_L) and (taicpu(hp1).oper[0]^.val < 32))
  6587. )
  6588. ) or (
  6589. (taicpu(hp1).opcode <> A_SHL) and
  6590. (
  6591. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6592. { Is it in the negative range? }
  6593. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6594. )
  6595. )
  6596. )
  6597. ) or (
  6598. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) and
  6599. ((taicpu(hp1).opcode = A_ADD) or (taicpu(hp1).opcode = A_AND) or (taicpu(hp1).opcode = A_SUB))
  6600. )
  6601. ) then
  6602. Break;
  6603. case taicpu(hp1).opcode of
  6604. A_ADD:
  6605. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6606. begin
  6607. TestValMin := TestValMin * 2;
  6608. TestValMax := TestValMax * 2;
  6609. end
  6610. else
  6611. begin
  6612. TestValMin := TestValMin + taicpu(hp1).oper[0]^.val;
  6613. TestValMax := TestValMax + taicpu(hp1).oper[0]^.val;
  6614. end;
  6615. A_SUB:
  6616. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6617. begin
  6618. TestValMin := 0;
  6619. TestValMax := 0;
  6620. end
  6621. else
  6622. begin
  6623. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6624. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6625. end;
  6626. A_AND:
  6627. if (taicpu(hp1).oper[0]^.typ = top_const) then
  6628. begin
  6629. { we might be able to go smaller if AND appears first }
  6630. if InstrMax = -1 then
  6631. case MinSize of
  6632. S_B:
  6633. ;
  6634. S_W:
  6635. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6636. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6637. begin
  6638. TrySmaller := S_B;
  6639. TrySmallerLimit := $FF;
  6640. end;
  6641. S_L:
  6642. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6643. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6644. begin
  6645. TrySmaller := S_B;
  6646. TrySmallerLimit := $FF;
  6647. end
  6648. else if ((taicpu(hp1).oper[0]^.val and $FFFF) = taicpu(hp1).oper[0]^.val) or
  6649. ((not(taicpu(hp1).oper[0]^.val) and $7FFF) = (not taicpu(hp1).oper[0]^.val)) then
  6650. begin
  6651. TrySmaller := S_W;
  6652. TrySmallerLimit := $FFFF;
  6653. end;
  6654. else
  6655. InternalError(2020112320);
  6656. end;
  6657. TestValMin := TestValMin and taicpu(hp1).oper[0]^.val;
  6658. TestValMax := TestValMax and taicpu(hp1).oper[0]^.val;
  6659. end;
  6660. A_SHL:
  6661. begin
  6662. TestValMin := TestValMin shl taicpu(hp1).oper[0]^.val;
  6663. TestValMax := TestValMax shl taicpu(hp1).oper[0]^.val;
  6664. end;
  6665. A_SHR:
  6666. begin
  6667. { we might be able to go smaller if SHR appears first }
  6668. if InstrMax = -1 then
  6669. case MinSize of
  6670. S_B:
  6671. ;
  6672. S_W:
  6673. if (taicpu(hp1).oper[0]^.val >= 8) then
  6674. begin
  6675. TrySmaller := S_B;
  6676. TrySmallerLimit := $FF;
  6677. end;
  6678. S_L:
  6679. if (taicpu(hp1).oper[0]^.val >= 24) then
  6680. begin
  6681. TrySmaller := S_B;
  6682. TrySmallerLimit := $FF;
  6683. end
  6684. else if (taicpu(hp1).oper[0]^.val >= 16) then
  6685. begin
  6686. TrySmaller := S_W;
  6687. TrySmallerLimit := $FFFF;
  6688. end;
  6689. else
  6690. InternalError(2020112321);
  6691. end;
  6692. TestValMin := TestValMin shr taicpu(hp1).oper[0]^.val;
  6693. TestValMax := TestValMax shr taicpu(hp1).oper[0]^.val;
  6694. end;
  6695. else
  6696. InternalError(2020112303);
  6697. end;
  6698. end;
  6699. (*
  6700. A_IMUL:
  6701. case taicpu(hp1).ops of
  6702. 2:
  6703. begin
  6704. if not MatchOpType(hp1, top_reg, top_reg) or
  6705. { Has to be an exact match on the register }
  6706. (taicpu(hp1).oper[0]^.reg <> ThisReg) or
  6707. (taicpu(hp1).oper[1]^.reg <> ThisReg) then
  6708. Break;
  6709. TestValMin := TestValMin * TestValMin;
  6710. TestValMax := TestValMax * TestValMax;
  6711. end;
  6712. 3:
  6713. begin
  6714. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6715. { Has to be an exact match on the register }
  6716. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6717. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6718. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6719. { Is it in the negative range? }
  6720. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6721. Break;
  6722. TestValMin := TestValMin * taicpu(hp1).oper[0]^.val;
  6723. TestValMax := TestValMax * taicpu(hp1).oper[0]^.val;
  6724. end;
  6725. else
  6726. Break;
  6727. end;
  6728. A_IDIV:
  6729. case taicpu(hp1).ops of
  6730. 3:
  6731. begin
  6732. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6733. { Has to be an exact match on the register }
  6734. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6735. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6736. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6737. { Is it in the negative range? }
  6738. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6739. Break;
  6740. TestValMin := TestValMin div taicpu(hp1).oper[0]^.val;
  6741. TestValMax := TestValMax div taicpu(hp1).oper[0]^.val;
  6742. end;
  6743. else
  6744. Break;
  6745. end;
  6746. *)
  6747. A_MOVZX:
  6748. begin
  6749. if not MatchOpType(taicpu(hp1), top_reg, top_reg) then
  6750. Break;
  6751. if not SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, ThisReg) then
  6752. begin
  6753. { Because hp1 was obtained via GetNextInstructionUsingReg
  6754. and ThisReg doesn't appear in the first operand, it
  6755. must appear in the second operand and hence gets
  6756. overwritten }
  6757. if (InstrMax = -1) and
  6758. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6759. begin
  6760. { The two MOVZX instructions are adjacent, so remove the first one }
  6761. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 5', p);
  6762. RemoveCurrentP(p);
  6763. Result := True;
  6764. Exit;
  6765. end;
  6766. Break;
  6767. end;
  6768. { The objective here is to try to find a combination that
  6769. removes one of the MOV/Z instructions. }
  6770. case taicpu(hp1).opsize of
  6771. S_WL:
  6772. if (MinSize in [S_B, S_W]) then
  6773. begin
  6774. TargetSize := S_L;
  6775. TargetSubReg := R_SUBD;
  6776. end
  6777. else if ((TrySmaller in [S_B, S_W]) and not SmallerOverflow) then
  6778. begin
  6779. TargetSize := TrySmaller;
  6780. if TrySmaller = S_B then
  6781. TargetSubReg := R_SUBL
  6782. else
  6783. TargetSubReg := R_SUBW;
  6784. end
  6785. else
  6786. Break;
  6787. S_BW:
  6788. if (MinSize in [S_B, S_W]) then
  6789. begin
  6790. TargetSize := S_W;
  6791. TargetSubReg := R_SUBW;
  6792. end
  6793. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6794. begin
  6795. TargetSize := S_B;
  6796. TargetSubReg := R_SUBL;
  6797. end
  6798. else
  6799. Break;
  6800. S_BL:
  6801. if (MinSize in [S_B, S_W]) then
  6802. begin
  6803. TargetSize := S_L;
  6804. TargetSubReg := R_SUBD;
  6805. end
  6806. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6807. begin
  6808. TargetSize := S_B;
  6809. TargetSubReg := R_SUBL;
  6810. end
  6811. else
  6812. Break;
  6813. else
  6814. InternalError(2020112302);
  6815. end;
  6816. { Update the register to its new size }
  6817. setsubreg(ThisReg, TargetSubReg);
  6818. if TargetSize = MinSize then
  6819. begin
  6820. { Convert the input MOVZX to a MOV }
  6821. if (taicpu(p).oper[0]^.typ = top_reg) and
  6822. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6823. begin
  6824. { Or remove it completely! }
  6825. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1', p);
  6826. RemoveCurrentP(p);
  6827. p_removed := True;
  6828. end
  6829. else
  6830. begin
  6831. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1', p);
  6832. taicpu(p).opcode := A_MOV;
  6833. taicpu(p).oper[1]^.reg := ThisReg;
  6834. taicpu(p).opsize := TargetSize;
  6835. end;
  6836. Result := True;
  6837. end
  6838. else if TargetSize <> MaxSize then
  6839. begin
  6840. case MaxSize of
  6841. S_L:
  6842. if TargetSize = S_W then
  6843. begin
  6844. DebugMsg(SPeepholeOptimization + 'movzbl2movzbw', p);
  6845. taicpu(p).opsize := S_BW;
  6846. taicpu(p).oper[1]^.reg := ThisReg;
  6847. Result := True;
  6848. end
  6849. else
  6850. InternalError(2020112341);
  6851. S_W:
  6852. if TargetSize = S_L then
  6853. begin
  6854. DebugMsg(SPeepholeOptimization + 'movzbw2movzbl', p);
  6855. taicpu(p).opsize := S_BL;
  6856. taicpu(p).oper[1]^.reg := ThisReg;
  6857. Result := True;
  6858. end
  6859. else
  6860. InternalError(2020112342);
  6861. else
  6862. ;
  6863. end;
  6864. end;
  6865. if (MaxSize = TargetSize) or
  6866. ((TargetSize = S_L) and (taicpu(hp1).opsize in [S_L, S_BL, S_WL])) or
  6867. ((TargetSize = S_W) and (taicpu(hp1).opsize in [S_W, S_BW])) then
  6868. begin
  6869. { Convert the output MOVZX to a MOV }
  6870. if SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6871. begin
  6872. { Or remove it completely! }
  6873. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 2', hp1);
  6874. { Be careful; if p = hp1 and p was also removed, p
  6875. will become a dangling pointer }
  6876. if p = hp1 then
  6877. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6878. else
  6879. RemoveInstruction(hp1);
  6880. end
  6881. else
  6882. begin
  6883. taicpu(hp1).opcode := A_MOV;
  6884. taicpu(hp1).oper[0]^.reg := ThisReg;
  6885. taicpu(hp1).opsize := TargetSize;
  6886. { Check to see if the active register is used afterwards;
  6887. if not, we can change it and make a saving. }
  6888. RegInUse := False;
  6889. TransferUsedRegs(TmpUsedRegs);
  6890. { The target register may be marked as in use to cross
  6891. a jump to a distant label, so exclude it }
  6892. ExcludeRegFromUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs);
  6893. hp2 := p;
  6894. repeat
  6895. UpdateUsedRegs(TmpUsedRegs, tai(hp2.next));
  6896. { Explicitly check for the excluded register (don't include the first
  6897. instruction as it may be reading from here }
  6898. if ((p <> hp2) and (RegInInstruction(taicpu(hp1).oper[1]^.reg, hp2))) or
  6899. RegInUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs) then
  6900. begin
  6901. RegInUse := True;
  6902. Break;
  6903. end;
  6904. if not GetNextInstruction(hp2, hp2) then
  6905. InternalError(2020112340);
  6906. until (hp2 = hp1);
  6907. if not RegInUse and not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6908. begin
  6909. DebugMsg(SPeepholeOptimization + 'Simplified register usage so ' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' = ' + debug_regname(taicpu(p).oper[1]^.reg), p);
  6910. ThisReg := taicpu(hp1).oper[1]^.reg;
  6911. RegChanged := True;
  6912. TransferUsedRegs(TmpUsedRegs);
  6913. AllocRegBetween(ThisReg, p, hp1, TmpUsedRegs);
  6914. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 3', hp1);
  6915. if p = hp1 then
  6916. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6917. else
  6918. RemoveInstruction(hp1);
  6919. { Instruction will become "mov %reg,%reg" }
  6920. if not p_removed and (taicpu(p).opcode = A_MOV) and
  6921. MatchOperand(taicpu(p).oper[0]^, ThisReg) then
  6922. begin
  6923. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 6', p);
  6924. RemoveCurrentP(p);
  6925. p_removed := True;
  6926. end
  6927. else
  6928. taicpu(p).oper[1]^.reg := ThisReg;
  6929. Result := True;
  6930. end
  6931. else
  6932. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 2', hp1);
  6933. end;
  6934. end
  6935. else
  6936. InternalError(2020112330);
  6937. { Now go through every instruction we found and change the
  6938. size. If TargetSize = MaxSize, then almost no changes are
  6939. needed and Result can remain False if it hasn't been set
  6940. yet.
  6941. If RegChanged is True, then the register requires changing
  6942. and so the point about TargetSize = MaxSize doesn't apply. }
  6943. if ((TargetSize <> MaxSize) or RegChanged) and (InstrMax >= 0) then
  6944. begin
  6945. for Index := 0 to InstrMax do
  6946. begin
  6947. { If p_removed is true, then the original MOV/Z was removed
  6948. and removing the AND instruction may not be safe if it
  6949. appears first }
  6950. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6951. InternalError(2020112310);
  6952. if InstrList[Index].oper[0]^.typ = top_reg then
  6953. InstrList[Index].oper[0]^.reg := ThisReg;
  6954. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6955. InstrList[Index].opsize := TargetSize;
  6956. end;
  6957. Result := True;
  6958. end;
  6959. Exit;
  6960. end;
  6961. else
  6962. { This includes ADC, SBB, IDIV and SAR }
  6963. Break;
  6964. end;
  6965. if (TestValMin < 0) or (TestValMax < 0) or
  6966. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6967. { Overflow }
  6968. Break
  6969. else if not SmallerOverflow and (TrySmaller <> S_NO) and
  6970. ((TestValMin > TrySmallerLimit) or (TestValMax > TrySmallerLimit)) then
  6971. SmallerOverflow := True;
  6972. { Contains highest index (so instruction count - 1) }
  6973. Inc(InstrMax);
  6974. if InstrMax > High(InstrList) then
  6975. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  6976. InstrList[InstrMax] := taicpu(hp1);
  6977. end;
  6978. end;
  6979. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  6980. var
  6981. hp1 : tai;
  6982. begin
  6983. Result:=false;
  6984. if (taicpu(p).ops >= 2) and
  6985. ((taicpu(p).oper[0]^.typ = top_const) or
  6986. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  6987. (taicpu(p).oper[1]^.typ = top_reg) and
  6988. ((taicpu(p).ops = 2) or
  6989. ((taicpu(p).oper[2]^.typ = top_reg) and
  6990. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  6991. GetLastInstruction(p,hp1) and
  6992. MatchInstruction(hp1,A_MOV,[]) and
  6993. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6994. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  6995. begin
  6996. TransferUsedRegs(TmpUsedRegs);
  6997. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  6998. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  6999. { change
  7000. mov reg1,reg2
  7001. imul y,reg2 to imul y,reg1,reg2 }
  7002. begin
  7003. taicpu(p).ops := 3;
  7004. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  7005. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  7006. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  7007. RemoveInstruction(hp1);
  7008. result:=true;
  7009. end;
  7010. end;
  7011. end;
  7012. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  7013. var
  7014. ThisLabel: TAsmLabel;
  7015. begin
  7016. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  7017. ThisLabel.decrefs;
  7018. taicpu(p).opcode := A_RET;
  7019. taicpu(p).is_jmp := false;
  7020. taicpu(p).ops := taicpu(ret_p).ops;
  7021. case taicpu(ret_p).ops of
  7022. 0:
  7023. taicpu(p).clearop(0);
  7024. 1:
  7025. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  7026. else
  7027. internalerror(2016041301);
  7028. end;
  7029. { If the original label is now dead, it might turn out that the label
  7030. immediately follows p. As a result, everything beyond it, which will
  7031. be just some final register configuration and a RET instruction, is
  7032. now dead code. [Kit] }
  7033. { NOTE: This is much faster than introducing a OptPass2RET routine and
  7034. running RemoveDeadCodeAfterJump for each RET instruction, because
  7035. this optimisation rarely happens and most RETs appear at the end of
  7036. routines where there is nothing that can be stripped. [Kit] }
  7037. if not ThisLabel.is_used then
  7038. RemoveDeadCodeAfterJump(p);
  7039. end;
  7040. function TX86AsmOptimizer.OptPass2SETcc(var p: tai): boolean;
  7041. var
  7042. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  7043. Unconditional, PotentialModified: Boolean;
  7044. OperPtr: POper;
  7045. NewRef: TReference;
  7046. InstrList: array of taicpu;
  7047. InstrMax, Index: Integer;
  7048. const
  7049. {$ifdef DEBUG_AOPTCPU}
  7050. SNoFlags: shortstring = ' so the flags aren''t modified';
  7051. {$else DEBUG_AOPTCPU}
  7052. SNoFlags = '';
  7053. {$endif DEBUG_AOPTCPU}
  7054. begin
  7055. Result:=false;
  7056. if MatchOpType(taicpu(p),top_reg) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  7057. begin
  7058. if MatchInstruction(hp1, A_TEST, [S_B]) and
  7059. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7060. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  7061. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  7062. GetNextInstruction(hp1, hp2) and
  7063. MatchInstruction(hp2, A_Jcc, A_SETcc, []) then
  7064. { Change from: To:
  7065. set(C) %reg j(~C) label
  7066. test %reg,%reg/cmp $0,%reg
  7067. je label
  7068. set(C) %reg j(C) label
  7069. test %reg,%reg/cmp $0,%reg
  7070. jne label
  7071. (Also do something similar with sete/setne instead of je/jne)
  7072. }
  7073. begin
  7074. { Before we do anything else, we need to check the instructions
  7075. in between SETcc and TEST to make sure they don't modify the
  7076. FLAGS register - if -O2 or under, there won't be any
  7077. instructions between SET and TEST }
  7078. TransferUsedRegs(TmpUsedRegs);
  7079. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7080. if (cs_opt_level3 in current_settings.optimizerswitches) then
  7081. begin
  7082. next := p;
  7083. SetLength(InstrList, 0);
  7084. InstrMax := -1;
  7085. PotentialModified := False;
  7086. { Make a note of every instruction that modifies the FLAGS
  7087. register }
  7088. while GetNextInstruction(next, next) and (next <> hp1) do
  7089. begin
  7090. if next.typ <> ait_instruction then
  7091. { GetNextInstructionUsingReg should have returned False }
  7092. InternalError(2021051701);
  7093. if RegModifiedByInstruction(NR_DEFAULTFLAGS, next) then
  7094. begin
  7095. case taicpu(next).opcode of
  7096. A_SETcc,
  7097. A_CMOVcc,
  7098. A_Jcc:
  7099. begin
  7100. if PotentialModified then
  7101. { Not safe because the flags were modified earlier }
  7102. Exit
  7103. else
  7104. { Condition is the same as the initial SETcc, so this is safe
  7105. (don't add to instruction list though) }
  7106. Continue;
  7107. end;
  7108. A_ADD:
  7109. begin
  7110. if (taicpu(next).opsize = S_B) or
  7111. { LEA doesn't support 8-bit operands }
  7112. (taicpu(next).oper[1]^.typ <> top_reg) or
  7113. { Must write to a register }
  7114. (taicpu(next).oper[0]^.typ = top_ref) then
  7115. { Require a constant or a register }
  7116. Exit;
  7117. PotentialModified := True;
  7118. end;
  7119. A_SUB:
  7120. begin
  7121. if (taicpu(next).opsize = S_B) or
  7122. { LEA doesn't support 8-bit operands }
  7123. (taicpu(next).oper[1]^.typ <> top_reg) or
  7124. { Must write to a register }
  7125. (taicpu(next).oper[0]^.typ <> top_const) or
  7126. (taicpu(next).oper[0]^.val = $80000000) then
  7127. { Can't subtract a register with LEA - also
  7128. check that the value isn't -2^31, as this
  7129. can't be negated }
  7130. Exit;
  7131. PotentialModified := True;
  7132. end;
  7133. A_SAL,
  7134. A_SHL:
  7135. begin
  7136. if (taicpu(next).opsize = S_B) or
  7137. { LEA doesn't support 8-bit operands }
  7138. (taicpu(next).oper[1]^.typ <> top_reg) or
  7139. { Must write to a register }
  7140. (taicpu(next).oper[0]^.typ <> top_const) or
  7141. (taicpu(next).oper[0]^.val < 0) or
  7142. (taicpu(next).oper[0]^.val > 3) then
  7143. Exit;
  7144. PotentialModified := True;
  7145. end;
  7146. A_IMUL:
  7147. begin
  7148. if (taicpu(next).ops <> 3) or
  7149. (taicpu(next).oper[1]^.typ <> top_reg) or
  7150. { Must write to a register }
  7151. (taicpu(next).oper[2]^.val in [2,3,4,5,8,9]) then
  7152. { We can convert "imul x,%reg1,%reg2" (where x = 2, 4 or 8)
  7153. to "lea (%reg1,x),%reg2". If x = 3, 5 or 9, we can
  7154. change this to "lea (%reg1,%reg1,(x-1)),%reg2" }
  7155. Exit
  7156. else
  7157. PotentialModified := True;
  7158. end;
  7159. else
  7160. { Don't know how to change this, so abort }
  7161. Exit;
  7162. end;
  7163. { Contains highest index (so instruction count - 1) }
  7164. Inc(InstrMax);
  7165. if InstrMax > High(InstrList) then
  7166. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  7167. InstrList[InstrMax] := taicpu(next);
  7168. end;
  7169. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  7170. end;
  7171. if not Assigned(next) or (next <> hp1) then
  7172. { It should be equal to hp1 }
  7173. InternalError(2021051702);
  7174. { Cycle through each instruction and check to see if we can
  7175. change them to versions that don't modify the flags }
  7176. if (InstrMax >= 0) then
  7177. begin
  7178. for Index := 0 to InstrMax do
  7179. case InstrList[Index].opcode of
  7180. A_ADD:
  7181. begin
  7182. DebugMsg(SPeepholeOptimization + 'ADD -> LEA' + SNoFlags, InstrList[Index]);
  7183. InstrList[Index].opcode := A_LEA;
  7184. reference_reset(NewRef, 1, []);
  7185. NewRef.base := InstrList[Index].oper[1]^.reg;
  7186. if InstrList[Index].oper[0]^.typ = top_reg then
  7187. begin
  7188. NewRef.index := InstrList[Index].oper[0]^.reg;
  7189. NewRef.scalefactor := 1;
  7190. end
  7191. else
  7192. NewRef.offset := InstrList[Index].oper[0]^.val;
  7193. InstrList[Index].loadref(0, NewRef);
  7194. end;
  7195. A_SUB:
  7196. begin
  7197. DebugMsg(SPeepholeOptimization + 'SUB -> LEA' + SNoFlags, InstrList[Index]);
  7198. InstrList[Index].opcode := A_LEA;
  7199. reference_reset(NewRef, 1, []);
  7200. NewRef.base := InstrList[Index].oper[1]^.reg;
  7201. NewRef.offset := -InstrList[Index].oper[0]^.val;
  7202. InstrList[Index].loadref(0, NewRef);
  7203. end;
  7204. A_SHL,
  7205. A_SAL:
  7206. begin
  7207. DebugMsg(SPeepholeOptimization + 'SHL -> LEA' + SNoFlags, InstrList[Index]);
  7208. InstrList[Index].opcode := A_LEA;
  7209. reference_reset(NewRef, 1, []);
  7210. NewRef.index := InstrList[Index].oper[1]^.reg;
  7211. NewRef.scalefactor := 1 shl (InstrList[Index].oper[0]^.val);
  7212. InstrList[Index].loadref(0, NewRef);
  7213. end;
  7214. A_IMUL:
  7215. begin
  7216. DebugMsg(SPeepholeOptimization + 'IMUL -> LEA' + SNoFlags, InstrList[Index]);
  7217. InstrList[Index].opcode := A_LEA;
  7218. reference_reset(NewRef, 1, []);
  7219. NewRef.index := InstrList[Index].oper[1]^.reg;
  7220. case InstrList[Index].oper[0]^.val of
  7221. 2, 4, 8:
  7222. NewRef.scalefactor := InstrList[Index].oper[0]^.val;
  7223. else {3, 5 and 9}
  7224. begin
  7225. NewRef.scalefactor := InstrList[Index].oper[0]^.val - 1;
  7226. NewRef.base := InstrList[Index].oper[1]^.reg;
  7227. end;
  7228. end;
  7229. InstrList[Index].loadref(0, NewRef);
  7230. end;
  7231. else
  7232. InternalError(2021051710);
  7233. end;
  7234. end;
  7235. { Mark the FLAGS register as used across this whole block }
  7236. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  7237. end;
  7238. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  7239. JumpC := taicpu(hp2).condition;
  7240. Unconditional := False;
  7241. if conditions_equal(JumpC, C_E) then
  7242. SetC := inverse_cond(taicpu(p).condition)
  7243. else if conditions_equal(JumpC, C_NE) then
  7244. SetC := taicpu(p).condition
  7245. else
  7246. { We've got something weird here (and inefficent) }
  7247. begin
  7248. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  7249. SetC := C_NONE;
  7250. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  7251. if condition_in(C_AE, JumpC) then
  7252. Unconditional := True
  7253. else
  7254. { Not sure what to do with this jump - drop out }
  7255. Exit;
  7256. end;
  7257. RemoveInstruction(hp1);
  7258. if Unconditional then
  7259. MakeUnconditional(taicpu(hp2))
  7260. else
  7261. begin
  7262. if SetC = C_NONE then
  7263. InternalError(2018061402);
  7264. taicpu(hp2).SetCondition(SetC);
  7265. end;
  7266. { as hp2 is a jump, we cannot use RegUsedAfterInstruction but we have to check if it is included in
  7267. TmpUsedRegs }
  7268. if not TmpUsedRegs[getregtype(taicpu(p).oper[0]^.reg)].IsUsed(taicpu(p).oper[0]^.reg) then
  7269. begin
  7270. RemoveCurrentp(p, hp2);
  7271. if taicpu(hp2).opcode = A_SETcc then
  7272. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc',p)
  7273. else
  7274. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  7275. end
  7276. else
  7277. if taicpu(hp2).opcode = A_SETcc then
  7278. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc/SETcc',p)
  7279. else
  7280. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> SETcc/Jcc',p);
  7281. Result := True;
  7282. end
  7283. else if
  7284. { Make sure the instructions are adjacent }
  7285. (
  7286. not (cs_opt_level3 in current_settings.optimizerswitches) or
  7287. GetNextInstruction(p, hp1)
  7288. ) and
  7289. MatchInstruction(hp1, A_MOV, [S_B]) and
  7290. { Writing to memory is allowed }
  7291. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) then
  7292. begin
  7293. {
  7294. Watch out for sequences such as:
  7295. set(c)b %regb
  7296. movb %regb,(ref)
  7297. movb $0,1(ref)
  7298. movb $0,2(ref)
  7299. movb $0,3(ref)
  7300. Much more efficient to turn it into:
  7301. movl $0,%regl
  7302. set(c)b %regb
  7303. movl %regl,(ref)
  7304. Or:
  7305. set(c)b %regb
  7306. movzbl %regb,%regl
  7307. movl %regl,(ref)
  7308. }
  7309. if (taicpu(hp1).oper[1]^.typ = top_ref) and
  7310. GetNextInstruction(hp1, hp2) and
  7311. MatchInstruction(hp2, A_MOV, [S_B]) and
  7312. (taicpu(hp2).oper[1]^.typ = top_ref) and
  7313. CheckMemoryWrite(taicpu(hp1), taicpu(hp2)) then
  7314. begin
  7315. { Don't do anything else except set Result to True }
  7316. end
  7317. else
  7318. begin
  7319. if taicpu(p).oper[0]^.typ = top_reg then
  7320. begin
  7321. TransferUsedRegs(TmpUsedRegs);
  7322. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  7323. end;
  7324. { If it's not a register, it's a memory address }
  7325. if (taicpu(p).oper[0]^.typ <> top_reg) or RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs) then
  7326. begin
  7327. { Even if the register is still in use, we can minimise the
  7328. pipeline stall by changing the MOV into another SETcc. }
  7329. taicpu(hp1).opcode := A_SETcc;
  7330. taicpu(hp1).condition := taicpu(p).condition;
  7331. if taicpu(hp1).oper[1]^.typ = top_ref then
  7332. begin
  7333. { Swapping the operand pointers like this is probably a
  7334. bit naughty, but it is far faster than using loadoper
  7335. to transfer the reference from oper[1] to oper[0] if
  7336. you take into account the extra procedure calls and
  7337. the memory allocation and deallocation required }
  7338. OperPtr := taicpu(hp1).oper[1];
  7339. taicpu(hp1).oper[1] := taicpu(hp1).oper[0];
  7340. taicpu(hp1).oper[0] := OperPtr;
  7341. end
  7342. else
  7343. taicpu(hp1).oper[0]^.reg := taicpu(hp1).oper[1]^.reg;
  7344. taicpu(hp1).clearop(1);
  7345. taicpu(hp1).ops := 1;
  7346. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc/SETcc',p);
  7347. end
  7348. else
  7349. begin
  7350. if taicpu(hp1).oper[1]^.typ = top_reg then
  7351. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  7352. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  7353. RemoveInstruction(hp1);
  7354. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc',p);
  7355. end
  7356. end;
  7357. Result := True;
  7358. end;
  7359. end;
  7360. end;
  7361. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  7362. var
  7363. hp1: tai;
  7364. Count: Integer;
  7365. OrigLabel: TAsmLabel;
  7366. begin
  7367. result := False;
  7368. { Sometimes, the optimisations below can permit this }
  7369. RemoveDeadCodeAfterJump(p);
  7370. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  7371. (taicpu(p).oper[0]^.ref^.index=NR_NO) and (taicpu(p).oper[0]^.ref^.symbol is tasmlabel) then
  7372. begin
  7373. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7374. { Also a side-effect of optimisations }
  7375. if CollapseZeroDistJump(p, OrigLabel) then
  7376. begin
  7377. Result := True;
  7378. Exit;
  7379. end;
  7380. hp1 := GetLabelWithSym(OrigLabel);
  7381. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  7382. begin
  7383. case taicpu(hp1).opcode of
  7384. A_RET:
  7385. {
  7386. change
  7387. jmp .L1
  7388. ...
  7389. .L1:
  7390. ret
  7391. into
  7392. ret
  7393. }
  7394. begin
  7395. ConvertJumpToRET(p, hp1);
  7396. result:=true;
  7397. end;
  7398. { Check any kind of direct assignment instruction }
  7399. A_MOV,
  7400. A_MOVD,
  7401. A_MOVQ,
  7402. A_MOVSX,
  7403. {$ifdef x86_64}
  7404. A_MOVSXD,
  7405. {$endif x86_64}
  7406. A_MOVZX,
  7407. A_MOVAPS,
  7408. A_MOVUPS,
  7409. A_MOVSD,
  7410. A_MOVAPD,
  7411. A_MOVUPD,
  7412. A_MOVDQA,
  7413. A_MOVDQU,
  7414. A_VMOVSS,
  7415. A_VMOVAPS,
  7416. A_VMOVUPS,
  7417. A_VMOVSD,
  7418. A_VMOVAPD,
  7419. A_VMOVUPD,
  7420. A_VMOVDQA,
  7421. A_VMOVDQU:
  7422. if ((current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size]) and
  7423. CheckJumpMovTransferOpt(p, hp1, 0, Count) then
  7424. begin
  7425. Result := True;
  7426. Exit;
  7427. end;
  7428. else
  7429. ;
  7430. end;
  7431. end;
  7432. end;
  7433. end;
  7434. class function TX86AsmOptimizer.CanBeCMOV(p : tai) : boolean;
  7435. begin
  7436. CanBeCMOV:=assigned(p) and
  7437. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  7438. { we can't use cmov ref,reg because
  7439. ref could be nil and cmov still throws an exception
  7440. if ref=nil but the mov isn't done (FK)
  7441. or ((taicpu(p).oper[0]^.typ = top_ref) and
  7442. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  7443. }
  7444. (taicpu(p).oper[1]^.typ = top_reg) and
  7445. (
  7446. (taicpu(p).oper[0]^.typ = top_reg) or
  7447. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  7448. it is not expected that this can cause a seg. violation }
  7449. (
  7450. (taicpu(p).oper[0]^.typ = top_ref) and
  7451. IsRefSafe(taicpu(p).oper[0]^.ref)
  7452. )
  7453. );
  7454. end;
  7455. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  7456. var
  7457. hp1,hp2: tai;
  7458. {$ifndef i8086}
  7459. hp3,hp4,hpmov2, hp5: tai;
  7460. l : Longint;
  7461. condition : TAsmCond;
  7462. {$endif i8086}
  7463. carryadd_opcode : TAsmOp;
  7464. symbol: TAsmSymbol;
  7465. reg: tsuperregister;
  7466. increg, tmpreg: TRegister;
  7467. begin
  7468. result:=false;
  7469. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) then
  7470. begin
  7471. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7472. if (
  7473. (
  7474. ((Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB)) and
  7475. MatchOptype(Taicpu(hp1),top_const,top_reg) and
  7476. (Taicpu(hp1).oper[0]^.val=1)
  7477. ) or
  7478. ((Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC))
  7479. ) and
  7480. GetNextInstruction(hp1,hp2) and
  7481. SkipAligns(hp2, hp2) and
  7482. (hp2.typ = ait_label) and
  7483. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  7484. { jb @@1 cmc
  7485. inc/dec operand --> adc/sbb operand,0
  7486. @@1:
  7487. ... and ...
  7488. jnb @@1
  7489. inc/dec operand --> adc/sbb operand,0
  7490. @@1: }
  7491. begin
  7492. if Taicpu(p).condition in [C_NAE,C_B,C_C] then
  7493. begin
  7494. case taicpu(hp1).opcode of
  7495. A_INC,
  7496. A_ADD:
  7497. carryadd_opcode:=A_ADC;
  7498. A_DEC,
  7499. A_SUB:
  7500. carryadd_opcode:=A_SBB;
  7501. else
  7502. InternalError(2021011001);
  7503. end;
  7504. Taicpu(p).clearop(0);
  7505. Taicpu(p).ops:=0;
  7506. Taicpu(p).is_jmp:=false;
  7507. Taicpu(p).opcode:=A_CMC;
  7508. Taicpu(p).condition:=C_NONE;
  7509. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2CmcAdc/Sbb',p);
  7510. Taicpu(hp1).ops:=2;
  7511. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7512. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7513. else
  7514. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7515. Taicpu(hp1).loadconst(0,0);
  7516. Taicpu(hp1).opcode:=carryadd_opcode;
  7517. result:=true;
  7518. exit;
  7519. end
  7520. else if Taicpu(p).condition in [C_AE,C_NB,C_NC] then
  7521. begin
  7522. case taicpu(hp1).opcode of
  7523. A_INC,
  7524. A_ADD:
  7525. carryadd_opcode:=A_ADC;
  7526. A_DEC,
  7527. A_SUB:
  7528. carryadd_opcode:=A_SBB;
  7529. else
  7530. InternalError(2021011002);
  7531. end;
  7532. Taicpu(hp1).ops:=2;
  7533. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2Adc/Sbb',p);
  7534. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7535. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7536. else
  7537. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7538. Taicpu(hp1).loadconst(0,0);
  7539. Taicpu(hp1).opcode:=carryadd_opcode;
  7540. RemoveCurrentP(p, hp1);
  7541. result:=true;
  7542. exit;
  7543. end
  7544. {
  7545. jcc @@1 setcc tmpreg
  7546. inc/dec/add/sub operand -> (movzx tmpreg)
  7547. @@1: add/sub tmpreg,operand
  7548. While this increases code size slightly, it makes the code much faster if the
  7549. jump is unpredictable
  7550. }
  7551. else if not(cs_opt_size in current_settings.optimizerswitches) then
  7552. begin
  7553. { search for an available register which is volatile }
  7554. for reg in tcpuregisterset do
  7555. begin
  7556. if
  7557. {$if defined(i386) or defined(i8086)}
  7558. { Only use registers whose lowest 8-bits can Be accessed }
  7559. (reg in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) and
  7560. {$endif i386 or i8086}
  7561. (reg in paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption)) and
  7562. not(reg in UsedRegs[R_INTREGISTER].GetUsedRegs)
  7563. { We don't need to check if tmpreg is in hp1 or not, because
  7564. it will be marked as in use at p (if not, this is
  7565. indictive of a compiler bug). }
  7566. then
  7567. begin
  7568. TAsmLabel(symbol).decrefs;
  7569. increg := newreg(R_INTREGISTER,reg,R_SUBL);
  7570. Taicpu(p).clearop(0);
  7571. Taicpu(p).ops:=1;
  7572. Taicpu(p).is_jmp:=false;
  7573. Taicpu(p).opcode:=A_SETcc;
  7574. DebugMsg(SPeepholeOptimization+'JccAdd2SetccAdd',p);
  7575. Taicpu(p).condition:=inverse_cond(Taicpu(p).condition);
  7576. Taicpu(p).loadreg(0,increg);
  7577. if getsubreg(Taicpu(hp1).oper[1]^.reg)<>R_SUBL then
  7578. begin
  7579. case getsubreg(Taicpu(hp1).oper[1]^.reg) of
  7580. R_SUBW:
  7581. begin
  7582. tmpreg := newreg(R_INTREGISTER,reg,R_SUBW);
  7583. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BW,increg,tmpreg);
  7584. end;
  7585. R_SUBD:
  7586. begin
  7587. tmpreg := newreg(R_INTREGISTER,reg,R_SUBD);
  7588. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,tmpreg);
  7589. end;
  7590. {$ifdef x86_64}
  7591. R_SUBQ:
  7592. begin
  7593. { MOVZX doesn't have a 64-bit variant, because
  7594. the 32-bit version implicitly zeroes the
  7595. upper 32-bits of the destination register }
  7596. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,
  7597. newreg(R_INTREGISTER,reg,R_SUBD));
  7598. tmpreg := newreg(R_INTREGISTER,reg,R_SUBQ);
  7599. end;
  7600. {$endif x86_64}
  7601. else
  7602. Internalerror(2020030601);
  7603. end;
  7604. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  7605. asml.InsertAfter(hp2,p);
  7606. end
  7607. else
  7608. tmpreg := increg;
  7609. if (Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC) then
  7610. begin
  7611. Taicpu(hp1).ops:=2;
  7612. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^)
  7613. end;
  7614. Taicpu(hp1).loadreg(0,tmpreg);
  7615. AllocRegBetween(tmpreg,p,hp1,UsedRegs);
  7616. Result := True;
  7617. { p is no longer a Jcc instruction, so exit }
  7618. Exit;
  7619. end;
  7620. end;
  7621. end;
  7622. end;
  7623. { Detect the following:
  7624. jmp<cond> @Lbl1
  7625. jmp @Lbl2
  7626. ...
  7627. @Lbl1:
  7628. ret
  7629. Change to:
  7630. jmp<inv_cond> @Lbl2
  7631. ret
  7632. }
  7633. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  7634. begin
  7635. hp2:=getlabelwithsym(TAsmLabel(symbol));
  7636. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  7637. MatchInstruction(hp2,A_RET,[S_NO]) then
  7638. begin
  7639. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  7640. { Change label address to that of the unconditional jump }
  7641. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  7642. TAsmLabel(symbol).DecRefs;
  7643. taicpu(hp1).opcode := A_RET;
  7644. taicpu(hp1).is_jmp := false;
  7645. taicpu(hp1).ops := taicpu(hp2).ops;
  7646. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  7647. case taicpu(hp2).ops of
  7648. 0:
  7649. taicpu(hp1).clearop(0);
  7650. 1:
  7651. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  7652. else
  7653. internalerror(2016041302);
  7654. end;
  7655. end;
  7656. {$ifndef i8086}
  7657. end
  7658. {
  7659. convert
  7660. j<c> .L1
  7661. mov 1,reg
  7662. jmp .L2
  7663. .L1
  7664. mov 0,reg
  7665. .L2
  7666. into
  7667. mov 0,reg
  7668. set<not(c)> reg
  7669. take care of alignment and that the mov 0,reg is not converted into a xor as this
  7670. would destroy the flag contents
  7671. }
  7672. else if MatchInstruction(hp1,A_MOV,[]) and
  7673. MatchOpType(taicpu(hp1),top_const,top_reg) and
  7674. {$ifdef i386}
  7675. (
  7676. { Under i386, ESI, EDI, EBP and ESP
  7677. don't have an 8-bit representation }
  7678. not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  7679. ) and
  7680. {$endif i386}
  7681. (taicpu(hp1).oper[0]^.val=1) and
  7682. GetNextInstruction(hp1,hp2) and
  7683. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  7684. GetNextInstruction(hp2,hp3) and
  7685. { skip align }
  7686. ((hp3.typ<>ait_align) or GetNextInstruction(hp3,hp3)) and
  7687. (hp3.typ=ait_label) and
  7688. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  7689. (tai_label(hp3).labsym.getrefs=1) and
  7690. GetNextInstruction(hp3,hp4) and
  7691. MatchInstruction(hp4,A_MOV,[]) and
  7692. MatchOpType(taicpu(hp4),top_const,top_reg) and
  7693. (taicpu(hp4).oper[0]^.val=0) and
  7694. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  7695. GetNextInstruction(hp4,hp5) and
  7696. (hp5.typ=ait_label) and
  7697. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) and
  7698. (tai_label(hp5).labsym.getrefs=1) then
  7699. begin
  7700. AllocRegBetween(NR_FLAGS,p,hp4,UsedRegs);
  7701. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2MovSetcc',p);
  7702. { remove last label }
  7703. RemoveInstruction(hp5);
  7704. { remove second label }
  7705. RemoveInstruction(hp3);
  7706. { if align is present remove it }
  7707. if GetNextInstruction(hp2,hp3) and (hp3.typ=ait_align) then
  7708. RemoveInstruction(hp3);
  7709. { remove jmp }
  7710. RemoveInstruction(hp2);
  7711. if taicpu(hp1).opsize=S_B then
  7712. RemoveInstruction(hp1)
  7713. else
  7714. taicpu(hp1).loadconst(0,0);
  7715. taicpu(hp4).opcode:=A_SETcc;
  7716. taicpu(hp4).opsize:=S_B;
  7717. taicpu(hp4).condition:=inverse_cond(taicpu(p).condition);
  7718. taicpu(hp4).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(hp4).oper[1]^.reg),R_SUBL));
  7719. taicpu(hp4).opercnt:=1;
  7720. taicpu(hp4).ops:=1;
  7721. taicpu(hp4).freeop(1);
  7722. RemoveCurrentP(p);
  7723. Result:=true;
  7724. exit;
  7725. end
  7726. else if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  7727. begin
  7728. { check for
  7729. jCC xxx
  7730. <several movs>
  7731. xxx:
  7732. }
  7733. l:=0;
  7734. while assigned(hp1) and
  7735. CanBeCMOV(hp1) and
  7736. { stop on labels }
  7737. not(hp1.typ=ait_label) do
  7738. begin
  7739. inc(l);
  7740. GetNextInstruction(hp1,hp1);
  7741. end;
  7742. if assigned(hp1) then
  7743. begin
  7744. if FindLabel(tasmlabel(symbol),hp1) then
  7745. begin
  7746. if (l<=4) and (l>0) then
  7747. begin
  7748. condition:=inverse_cond(taicpu(p).condition);
  7749. GetNextInstruction(p,hp1);
  7750. repeat
  7751. if not Assigned(hp1) then
  7752. InternalError(2018062900);
  7753. taicpu(hp1).opcode:=A_CMOVcc;
  7754. taicpu(hp1).condition:=condition;
  7755. UpdateUsedRegs(hp1);
  7756. GetNextInstruction(hp1,hp1);
  7757. until not(CanBeCMOV(hp1));
  7758. { Remember what hp1 is in case there's multiple aligns to get rid of }
  7759. hp2 := hp1;
  7760. repeat
  7761. if not Assigned(hp2) then
  7762. InternalError(2018062910);
  7763. case hp2.typ of
  7764. ait_label:
  7765. { What we expected - break out of the loop (it won't be a dead label at the top of
  7766. a cluster because that was optimised at an earlier stage) }
  7767. Break;
  7768. ait_align:
  7769. { Go to the next entry until a label is found (may be multiple aligns before it) }
  7770. begin
  7771. hp2 := tai(hp2.Next);
  7772. Continue;
  7773. end;
  7774. else
  7775. begin
  7776. { Might be a comment or temporary allocation entry }
  7777. if not (hp2.typ in SkipInstr) then
  7778. InternalError(2018062911);
  7779. hp2 := tai(hp2.Next);
  7780. Continue;
  7781. end;
  7782. end;
  7783. until False;
  7784. { Now we can safely decrement the reference count }
  7785. tasmlabel(symbol).decrefs;
  7786. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  7787. { Remove the original jump }
  7788. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  7789. GetNextInstruction(hp2, p); { Instruction after the label }
  7790. { Remove the label if this is its final reference }
  7791. if (tasmlabel(symbol).getrefs=0) then
  7792. StripLabelFast(hp1);
  7793. if Assigned(p) then
  7794. begin
  7795. UpdateUsedRegs(p);
  7796. result:=true;
  7797. end;
  7798. exit;
  7799. end;
  7800. end
  7801. else
  7802. begin
  7803. { check further for
  7804. jCC xxx
  7805. <several movs 1>
  7806. jmp yyy
  7807. xxx:
  7808. <several movs 2>
  7809. yyy:
  7810. }
  7811. { hp2 points to jmp yyy }
  7812. hp2:=hp1;
  7813. { skip hp1 to xxx (or an align right before it) }
  7814. GetNextInstruction(hp1, hp1);
  7815. if assigned(hp2) and
  7816. assigned(hp1) and
  7817. (l<=3) and
  7818. (hp2.typ=ait_instruction) and
  7819. (taicpu(hp2).is_jmp) and
  7820. (taicpu(hp2).condition=C_None) and
  7821. { real label and jump, no further references to the
  7822. label are allowed }
  7823. (tasmlabel(symbol).getrefs=1) and
  7824. FindLabel(tasmlabel(symbol),hp1) then
  7825. begin
  7826. l:=0;
  7827. { skip hp1 to <several moves 2> }
  7828. if (hp1.typ = ait_align) then
  7829. GetNextInstruction(hp1, hp1);
  7830. GetNextInstruction(hp1, hpmov2);
  7831. hp1 := hpmov2;
  7832. while assigned(hp1) and
  7833. CanBeCMOV(hp1) do
  7834. begin
  7835. inc(l);
  7836. GetNextInstruction(hp1, hp1);
  7837. end;
  7838. { hp1 points to yyy (or an align right before it) }
  7839. hp3 := hp1;
  7840. if assigned(hp1) and
  7841. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  7842. begin
  7843. condition:=inverse_cond(taicpu(p).condition);
  7844. GetNextInstruction(p,hp1);
  7845. repeat
  7846. taicpu(hp1).opcode:=A_CMOVcc;
  7847. taicpu(hp1).condition:=condition;
  7848. UpdateUsedRegs(hp1);
  7849. GetNextInstruction(hp1,hp1);
  7850. until not(assigned(hp1)) or
  7851. not(CanBeCMOV(hp1));
  7852. condition:=inverse_cond(condition);
  7853. hp1 := hpmov2;
  7854. { hp1 is now at <several movs 2> }
  7855. while Assigned(hp1) and CanBeCMOV(hp1) do
  7856. begin
  7857. taicpu(hp1).opcode:=A_CMOVcc;
  7858. taicpu(hp1).condition:=condition;
  7859. UpdateUsedRegs(hp1);
  7860. GetNextInstruction(hp1,hp1);
  7861. end;
  7862. hp1 := p;
  7863. { Get first instruction after label }
  7864. GetNextInstruction(hp3, p);
  7865. if assigned(p) and (hp3.typ = ait_align) then
  7866. GetNextInstruction(p, p);
  7867. { Don't dereference yet, as doing so will cause
  7868. GetNextInstruction to skip the label and
  7869. optional align marker. [Kit] }
  7870. GetNextInstruction(hp2, hp4);
  7871. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  7872. { remove jCC }
  7873. RemoveInstruction(hp1);
  7874. { Now we can safely decrement it }
  7875. tasmlabel(symbol).decrefs;
  7876. { Remove label xxx (it will have a ref of zero due to the initial check }
  7877. StripLabelFast(hp4);
  7878. { remove jmp }
  7879. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  7880. RemoveInstruction(hp2);
  7881. { As before, now we can safely decrement it }
  7882. tasmlabel(symbol).decrefs;
  7883. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  7884. if tasmlabel(symbol).getrefs = 0 then
  7885. StripLabelFast(hp3);
  7886. if Assigned(p) then
  7887. begin
  7888. UpdateUsedRegs(p);
  7889. result:=true;
  7890. end;
  7891. exit;
  7892. end;
  7893. end;
  7894. end;
  7895. end;
  7896. {$endif i8086}
  7897. end;
  7898. end;
  7899. end;
  7900. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  7901. var
  7902. hp1,hp2: tai;
  7903. reg_and_hp1_is_instr: Boolean;
  7904. begin
  7905. result:=false;
  7906. reg_and_hp1_is_instr:=(taicpu(p).oper[1]^.typ = top_reg) and
  7907. GetNextInstruction(p,hp1) and
  7908. (hp1.typ = ait_instruction);
  7909. if reg_and_hp1_is_instr and
  7910. (
  7911. (taicpu(hp1).opcode <> A_LEA) or
  7912. { If the LEA instruction can be converted into an arithmetic instruction,
  7913. it may be possible to then fold it. }
  7914. (
  7915. { If the flags register is in use, don't change the instruction
  7916. to an ADD otherwise this will scramble the flags. [Kit] }
  7917. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  7918. ConvertLEA(taicpu(hp1))
  7919. )
  7920. ) and
  7921. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  7922. GetNextInstruction(hp1,hp2) and
  7923. MatchInstruction(hp2,A_MOV,[]) and
  7924. (taicpu(hp2).oper[0]^.typ = top_reg) and
  7925. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  7926. ((taicpu(p).opsize in [S_BW,S_BL]) and (taicpu(hp2).opsize=S_B) or
  7927. (taicpu(p).opsize in [S_WL]) and (taicpu(hp2).opsize=S_W)) and
  7928. {$ifdef i386}
  7929. { not all registers have byte size sub registers on i386 }
  7930. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  7931. {$endif i386}
  7932. (((taicpu(hp1).ops=2) and
  7933. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  7934. ((taicpu(hp1).ops=1) and
  7935. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  7936. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  7937. begin
  7938. { change movsX/movzX reg/ref, reg2
  7939. add/sub/or/... reg3/$const, reg2
  7940. mov reg2 reg/ref
  7941. to add/sub/or/... reg3/$const, reg/ref }
  7942. { by example:
  7943. movswl %si,%eax movswl %si,%eax p
  7944. decl %eax addl %edx,%eax hp1
  7945. movw %ax,%si movw %ax,%si hp2
  7946. ->
  7947. movswl %si,%eax movswl %si,%eax p
  7948. decw %eax addw %edx,%eax hp1
  7949. movw %ax,%si movw %ax,%si hp2
  7950. }
  7951. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  7952. {
  7953. ->
  7954. movswl %si,%eax movswl %si,%eax p
  7955. decw %si addw %dx,%si hp1
  7956. movw %ax,%si movw %ax,%si hp2
  7957. }
  7958. case taicpu(hp1).ops of
  7959. 1:
  7960. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  7961. 2:
  7962. begin
  7963. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  7964. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  7965. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  7966. end;
  7967. else
  7968. internalerror(2008042702);
  7969. end;
  7970. {
  7971. ->
  7972. decw %si addw %dx,%si p
  7973. }
  7974. DebugMsg(SPeepholeOptimization + 'var3',p);
  7975. RemoveCurrentP(p, hp1);
  7976. RemoveInstruction(hp2);
  7977. end
  7978. else if reg_and_hp1_is_instr and
  7979. (taicpu(hp1).opcode = A_MOV) and
  7980. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7981. (MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^)
  7982. {$ifdef x86_64}
  7983. { check for implicit extension to 64 bit }
  7984. or
  7985. ((taicpu(p).opsize in [S_BL,S_WL]) and
  7986. (taicpu(hp1).opsize=S_Q) and
  7987. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg)
  7988. )
  7989. {$endif x86_64}
  7990. )
  7991. then
  7992. begin
  7993. { change
  7994. movx %reg1,%reg2
  7995. mov %reg2,%reg3
  7996. dealloc %reg2
  7997. into
  7998. movx %reg,%reg3
  7999. }
  8000. TransferUsedRegs(TmpUsedRegs);
  8001. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  8002. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  8003. begin
  8004. DebugMsg(SPeepholeOptimization + 'MovxMov2Movx',p);
  8005. {$ifdef x86_64}
  8006. if (taicpu(p).opsize in [S_BL,S_WL]) and
  8007. (taicpu(hp1).opsize=S_Q) then
  8008. taicpu(p).loadreg(1,newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg),R_SUBD))
  8009. else
  8010. {$endif x86_64}
  8011. taicpu(p).loadreg(1,taicpu(hp1).oper[1]^.reg);
  8012. RemoveInstruction(hp1);
  8013. end;
  8014. end
  8015. else if reg_and_hp1_is_instr and
  8016. (taicpu(hp1).opcode = A_MOV) and
  8017. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  8018. (((taicpu(p).opsize in [S_BW,S_BL,S_WL{$ifdef x86_64},S_BQ,S_WQ,S_LQ{$endif x86_64}]) and
  8019. (taicpu(hp1).opsize=S_B)) or
  8020. ((taicpu(p).opsize in [S_WL{$ifdef x86_64},S_WQ,S_LQ{$endif x86_64}]) and
  8021. (taicpu(hp1).opsize=S_W))
  8022. {$ifdef x86_64}
  8023. or ((taicpu(p).opsize=S_LQ) and
  8024. (taicpu(hp1).opsize=S_L))
  8025. {$endif x86_64}
  8026. ) and
  8027. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg) then
  8028. begin
  8029. { change
  8030. movx %reg1,%reg2
  8031. mov %reg2,%reg3
  8032. dealloc %reg2
  8033. into
  8034. mov %reg1,%reg3
  8035. if the second mov accesses only the bits stored in reg1
  8036. }
  8037. TransferUsedRegs(TmpUsedRegs);
  8038. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  8039. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  8040. begin
  8041. DebugMsg(SPeepholeOptimization + 'MovxMov2Mov',p);
  8042. if taicpu(p).oper[0]^.typ=top_reg then
  8043. begin
  8044. case taicpu(hp1).opsize of
  8045. S_B:
  8046. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBL));
  8047. S_W:
  8048. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBW));
  8049. S_L:
  8050. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBD));
  8051. else
  8052. Internalerror(2020102301);
  8053. end;
  8054. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  8055. end
  8056. else
  8057. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  8058. RemoveCurrentP(p);
  8059. result:=true;
  8060. exit;
  8061. end;
  8062. end
  8063. else if reg_and_hp1_is_instr and
  8064. (taicpu(p).oper[0]^.typ = top_reg) and
  8065. (
  8066. (taicpu(hp1).opcode = A_SHL) or (taicpu(hp1).opcode = A_SAL)
  8067. ) and
  8068. (taicpu(hp1).oper[0]^.typ = top_const) and
  8069. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  8070. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8071. { Minimum shift value allowed is the bit difference between the sizes }
  8072. (taicpu(hp1).oper[0]^.val >=
  8073. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  8074. 8 * (
  8075. tcgsize2size[reg_cgsize(taicpu(p).oper[1]^.reg)] -
  8076. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  8077. )
  8078. ) then
  8079. begin
  8080. { For:
  8081. movsx/movzx %reg1,%reg1 (same register, just different sizes)
  8082. shl/sal ##, %reg1
  8083. Remove the movsx/movzx instruction if the shift overwrites the
  8084. extended bits of the register (e.g. movslq %eax,%rax; shlq $32,%rax
  8085. }
  8086. DebugMsg(SPeepholeOptimization + 'MovxShl2Shl',p);
  8087. RemoveCurrentP(p, hp1);
  8088. Result := True;
  8089. Exit;
  8090. end
  8091. else if reg_and_hp1_is_instr and
  8092. (taicpu(p).oper[0]^.typ = top_reg) and
  8093. (
  8094. ((taicpu(hp1).opcode = A_SHR) and (taicpu(p).opcode = A_MOVZX)) or
  8095. ((taicpu(hp1).opcode = A_SAR) and (taicpu(p).opcode <> A_MOVZX))
  8096. ) and
  8097. (taicpu(hp1).oper[0]^.typ = top_const) and
  8098. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  8099. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8100. { Minimum shift value allowed is the bit size of the smallest register - 1 }
  8101. (taicpu(hp1).oper[0]^.val <
  8102. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  8103. 8 * (
  8104. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  8105. )
  8106. ) then
  8107. begin
  8108. { For:
  8109. movsx %reg1,%reg1 movzx %reg1,%reg1 (same register, just different sizes)
  8110. sar ##, %reg1 shr ##, %reg1
  8111. Move the shift to before the movx instruction if the shift value
  8112. is not too large.
  8113. }
  8114. asml.Remove(hp1);
  8115. asml.InsertBefore(hp1, p);
  8116. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  8117. case taicpu(p).opsize of
  8118. s_BW, S_BL{$ifdef x86_64}, S_BQ{$endif}:
  8119. taicpu(hp1).opsize := S_B;
  8120. S_WL{$ifdef x86_64}, S_WQ{$endif}:
  8121. taicpu(hp1).opsize := S_W;
  8122. {$ifdef x86_64}
  8123. S_LQ:
  8124. taicpu(hp1).opsize := S_L;
  8125. {$endif}
  8126. else
  8127. InternalError(2020112401);
  8128. end;
  8129. if (taicpu(hp1).opcode = A_SHR) then
  8130. DebugMsg(SPeepholeOptimization + 'MovzShr2ShrMovz', hp1)
  8131. else
  8132. DebugMsg(SPeepholeOptimization + 'MovsSar2SarMovs', hp1);
  8133. Result := True;
  8134. end
  8135. else if taicpu(p).opcode=A_MOVZX then
  8136. begin
  8137. { removes superfluous And's after movzx's }
  8138. if reg_and_hp1_is_instr and
  8139. (taicpu(hp1).opcode = A_AND) and
  8140. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8141. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)
  8142. {$ifdef x86_64}
  8143. { check for implicit extension to 64 bit }
  8144. or
  8145. ((taicpu(p).opsize in [S_BL,S_WL]) and
  8146. (taicpu(hp1).opsize=S_Q) and
  8147. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg)
  8148. )
  8149. {$endif x86_64}
  8150. )
  8151. then
  8152. begin
  8153. case taicpu(p).opsize Of
  8154. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8155. if (taicpu(hp1).oper[0]^.val = $ff) then
  8156. begin
  8157. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz1',p);
  8158. RemoveInstruction(hp1);
  8159. Result:=true;
  8160. exit;
  8161. end;
  8162. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8163. if (taicpu(hp1).oper[0]^.val = $ffff) then
  8164. begin
  8165. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz2',p);
  8166. RemoveInstruction(hp1);
  8167. Result:=true;
  8168. exit;
  8169. end;
  8170. {$ifdef x86_64}
  8171. S_LQ:
  8172. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  8173. begin
  8174. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz3',p);
  8175. RemoveInstruction(hp1);
  8176. Result:=true;
  8177. exit;
  8178. end;
  8179. {$endif x86_64}
  8180. else
  8181. ;
  8182. end;
  8183. { we cannot get rid of the and, but can we get rid of the movz ?}
  8184. if SuperRegistersEqual(taicpu(p).oper[0]^.reg,taicpu(p).oper[1]^.reg) then
  8185. begin
  8186. case taicpu(p).opsize Of
  8187. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8188. if (taicpu(hp1).oper[0]^.val and $ff)=taicpu(hp1).oper[0]^.val then
  8189. begin
  8190. DebugMsg(SPeepholeOptimization + 'MovzAnd2And1',p);
  8191. RemoveCurrentP(p,hp1);
  8192. Result:=true;
  8193. exit;
  8194. end;
  8195. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8196. if (taicpu(hp1).oper[0]^.val and $ffff)=taicpu(hp1).oper[0]^.val then
  8197. begin
  8198. DebugMsg(SPeepholeOptimization + 'MovzAnd2And2',p);
  8199. RemoveCurrentP(p,hp1);
  8200. Result:=true;
  8201. exit;
  8202. end;
  8203. {$ifdef x86_64}
  8204. S_LQ:
  8205. if (taicpu(hp1).oper[0]^.val and $ffffffff)=taicpu(hp1).oper[0]^.val then
  8206. begin
  8207. DebugMsg(SPeepholeOptimization + 'MovzAnd2And3',p);
  8208. RemoveCurrentP(p,hp1);
  8209. Result:=true;
  8210. exit;
  8211. end;
  8212. {$endif x86_64}
  8213. else
  8214. ;
  8215. end;
  8216. end;
  8217. end;
  8218. { changes some movzx constructs to faster synonyms (all examples
  8219. are given with eax/ax, but are also valid for other registers)}
  8220. if MatchOpType(taicpu(p),top_reg,top_reg) then
  8221. begin
  8222. case taicpu(p).opsize of
  8223. { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
  8224. (the machine code is equivalent to movzbl %al,%eax), but the
  8225. code generator still generates that assembler instruction and
  8226. it is silently converted. This should probably be checked.
  8227. [Kit] }
  8228. S_BW:
  8229. begin
  8230. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8231. (
  8232. not IsMOVZXAcceptable
  8233. { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
  8234. or (
  8235. (cs_opt_size in current_settings.optimizerswitches) and
  8236. (taicpu(p).oper[1]^.reg = NR_AX)
  8237. )
  8238. ) then
  8239. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  8240. begin
  8241. DebugMsg(SPeepholeOptimization + 'var7',p);
  8242. taicpu(p).opcode := A_AND;
  8243. taicpu(p).changeopsize(S_W);
  8244. taicpu(p).loadConst(0,$ff);
  8245. Result := True;
  8246. end
  8247. else if not IsMOVZXAcceptable and
  8248. GetNextInstruction(p, hp1) and
  8249. (tai(hp1).typ = ait_instruction) and
  8250. (taicpu(hp1).opcode = A_AND) and
  8251. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8252. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8253. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  8254. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  8255. begin
  8256. DebugMsg(SPeepholeOptimization + 'var8',p);
  8257. taicpu(p).opcode := A_MOV;
  8258. taicpu(p).changeopsize(S_W);
  8259. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  8260. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8261. Result := True;
  8262. end;
  8263. end;
  8264. {$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
  8265. S_BL:
  8266. begin
  8267. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8268. (
  8269. not IsMOVZXAcceptable
  8270. { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
  8271. or (
  8272. (cs_opt_size in current_settings.optimizerswitches) and
  8273. (taicpu(p).oper[1]^.reg = NR_EAX)
  8274. )
  8275. ) then
  8276. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  8277. begin
  8278. DebugMsg(SPeepholeOptimization + 'var9',p);
  8279. taicpu(p).opcode := A_AND;
  8280. taicpu(p).changeopsize(S_L);
  8281. taicpu(p).loadConst(0,$ff);
  8282. Result := True;
  8283. end
  8284. else if not IsMOVZXAcceptable and
  8285. GetNextInstruction(p, hp1) and
  8286. (tai(hp1).typ = ait_instruction) and
  8287. (taicpu(hp1).opcode = A_AND) and
  8288. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8289. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8290. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  8291. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  8292. begin
  8293. DebugMsg(SPeepholeOptimization + 'var10',p);
  8294. taicpu(p).opcode := A_MOV;
  8295. taicpu(p).changeopsize(S_L);
  8296. { do not use R_SUBWHOLE
  8297. as movl %rdx,%eax
  8298. is invalid in assembler PM }
  8299. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8300. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8301. Result := True;
  8302. end;
  8303. end;
  8304. {$endif i8086}
  8305. S_WL:
  8306. if not IsMOVZXAcceptable then
  8307. begin
  8308. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
  8309. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  8310. begin
  8311. DebugMsg(SPeepholeOptimization + 'var11',p);
  8312. taicpu(p).opcode := A_AND;
  8313. taicpu(p).changeopsize(S_L);
  8314. taicpu(p).loadConst(0,$ffff);
  8315. Result := True;
  8316. end
  8317. else if GetNextInstruction(p, hp1) and
  8318. (tai(hp1).typ = ait_instruction) and
  8319. (taicpu(hp1).opcode = A_AND) and
  8320. (taicpu(hp1).oper[0]^.typ = top_const) and
  8321. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8322. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8323. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  8324. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  8325. begin
  8326. DebugMsg(SPeepholeOptimization + 'var12',p);
  8327. taicpu(p).opcode := A_MOV;
  8328. taicpu(p).changeopsize(S_L);
  8329. { do not use R_SUBWHOLE
  8330. as movl %rdx,%eax
  8331. is invalid in assembler PM }
  8332. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8333. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8334. Result := True;
  8335. end;
  8336. end;
  8337. else
  8338. InternalError(2017050705);
  8339. end;
  8340. end
  8341. else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
  8342. begin
  8343. if GetNextInstruction(p, hp1) and
  8344. (tai(hp1).typ = ait_instruction) and
  8345. (taicpu(hp1).opcode = A_AND) and
  8346. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8347. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8348. begin
  8349. //taicpu(p).opcode := A_MOV;
  8350. case taicpu(p).opsize Of
  8351. S_BL:
  8352. begin
  8353. DebugMsg(SPeepholeOptimization + 'var13',p);
  8354. taicpu(hp1).changeopsize(S_L);
  8355. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8356. end;
  8357. S_WL:
  8358. begin
  8359. DebugMsg(SPeepholeOptimization + 'var14',p);
  8360. taicpu(hp1).changeopsize(S_L);
  8361. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8362. end;
  8363. S_BW:
  8364. begin
  8365. DebugMsg(SPeepholeOptimization + 'var15',p);
  8366. taicpu(hp1).changeopsize(S_W);
  8367. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8368. end;
  8369. else
  8370. Internalerror(2017050704)
  8371. end;
  8372. Result := True;
  8373. end;
  8374. end;
  8375. end;
  8376. end;
  8377. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  8378. var
  8379. hp1, hp2 : tai;
  8380. MaskLength : Cardinal;
  8381. MaskedBits : TCgInt;
  8382. begin
  8383. Result:=false;
  8384. { There are no optimisations for reference targets }
  8385. if (taicpu(p).oper[1]^.typ <> top_reg) then
  8386. Exit;
  8387. while GetNextInstruction(p, hp1) and
  8388. (hp1.typ = ait_instruction) do
  8389. begin
  8390. if (taicpu(p).oper[0]^.typ = top_const) then
  8391. begin
  8392. case taicpu(hp1).opcode of
  8393. A_AND:
  8394. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8395. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8396. { the second register must contain the first one, so compare their subreg types }
  8397. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  8398. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  8399. { change
  8400. and const1, reg
  8401. and const2, reg
  8402. to
  8403. and (const1 and const2), reg
  8404. }
  8405. begin
  8406. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  8407. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  8408. RemoveCurrentP(p, hp1);
  8409. Result:=true;
  8410. exit;
  8411. end;
  8412. A_CMP:
  8413. if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
  8414. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
  8415. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8416. { Just check that the condition on the next instruction is compatible }
  8417. GetNextInstruction(hp1, hp2) and
  8418. (hp2.typ = ait_instruction) and
  8419. (taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
  8420. then
  8421. { change
  8422. and 2^n, reg
  8423. cmp 2^n, reg
  8424. j(c) / set(c) / cmov(c) (c is equal or not equal)
  8425. to
  8426. and 2^n, reg
  8427. test reg, reg
  8428. j(~c) / set(~c) / cmov(~c)
  8429. }
  8430. begin
  8431. { Keep TEST instruction in, rather than remove it, because
  8432. it may trigger other optimisations such as MovAndTest2Test }
  8433. taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
  8434. taicpu(hp1).opcode := A_TEST;
  8435. DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
  8436. taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
  8437. Result := True;
  8438. Exit;
  8439. end;
  8440. A_MOVZX:
  8441. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  8442. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
  8443. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8444. (
  8445. (
  8446. (taicpu(p).opsize=S_W) and
  8447. (taicpu(hp1).opsize=S_BW)
  8448. ) or
  8449. (
  8450. (taicpu(p).opsize=S_L) and
  8451. (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
  8452. )
  8453. {$ifdef x86_64}
  8454. or
  8455. (
  8456. (taicpu(p).opsize=S_Q) and
  8457. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
  8458. )
  8459. {$endif x86_64}
  8460. ) then
  8461. begin
  8462. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8463. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  8464. ) or
  8465. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8466. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  8467. then
  8468. begin
  8469. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  8470. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  8471. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  8472. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  8473. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  8474. }
  8475. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  8476. RemoveInstruction(hp1);
  8477. { See if there are other optimisations possible }
  8478. Continue;
  8479. end;
  8480. end;
  8481. A_SHL:
  8482. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8483. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  8484. begin
  8485. {$ifopt R+}
  8486. {$define RANGE_WAS_ON}
  8487. {$R-}
  8488. {$endif}
  8489. { get length of potential and mask }
  8490. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  8491. { really a mask? }
  8492. {$ifdef RANGE_WAS_ON}
  8493. {$R+}
  8494. {$endif}
  8495. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  8496. { unmasked part shifted out? }
  8497. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  8498. begin
  8499. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  8500. RemoveCurrentP(p, hp1);
  8501. Result:=true;
  8502. exit;
  8503. end;
  8504. end;
  8505. A_SHR:
  8506. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8507. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  8508. (taicpu(hp1).oper[0]^.val <= 63) then
  8509. begin
  8510. { Does SHR combined with the AND cover all the bits?
  8511. e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
  8512. MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
  8513. if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
  8514. ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
  8515. ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
  8516. begin
  8517. DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
  8518. RemoveCurrentP(p, hp1);
  8519. Result := True;
  8520. Exit;
  8521. end;
  8522. end;
  8523. A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  8524. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  8525. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
  8526. begin
  8527. if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8528. (
  8529. (
  8530. (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8531. ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
  8532. ) or (
  8533. (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8534. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
  8535. {$ifdef x86_64}
  8536. ) or (
  8537. (taicpu(hp1).opsize = S_LQ) and
  8538. ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
  8539. {$endif x86_64}
  8540. )
  8541. ) then
  8542. begin
  8543. if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
  8544. begin
  8545. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  8546. RemoveInstruction(hp1);
  8547. { See if there are other optimisations possible }
  8548. Continue;
  8549. end;
  8550. { The super-registers are the same though.
  8551. Note that this change by itself doesn't improve
  8552. code speed, but it opens up other optimisations. }
  8553. {$ifdef x86_64}
  8554. { Convert 64-bit register to 32-bit }
  8555. case taicpu(hp1).opsize of
  8556. S_BQ:
  8557. begin
  8558. taicpu(hp1).opsize := S_BL;
  8559. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8560. end;
  8561. S_WQ:
  8562. begin
  8563. taicpu(hp1).opsize := S_WL;
  8564. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8565. end
  8566. else
  8567. ;
  8568. end;
  8569. {$endif x86_64}
  8570. DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
  8571. taicpu(hp1).opcode := A_MOVZX;
  8572. { See if there are other optimisations possible }
  8573. Continue;
  8574. end;
  8575. end;
  8576. else
  8577. ;
  8578. end;
  8579. end;
  8580. if (taicpu(hp1).is_jmp) and
  8581. (taicpu(hp1).opcode<>A_JMP) and
  8582. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  8583. begin
  8584. { change
  8585. and x, reg
  8586. jxx
  8587. to
  8588. test x, reg
  8589. jxx
  8590. if reg is deallocated before the
  8591. jump, but only if it's a conditional jump (PFV)
  8592. }
  8593. taicpu(p).opcode := A_TEST;
  8594. Exit;
  8595. end;
  8596. Break;
  8597. end;
  8598. { Lone AND tests }
  8599. if (taicpu(p).oper[0]^.typ = top_const) then
  8600. begin
  8601. {
  8602. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  8603. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  8604. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  8605. }
  8606. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  8607. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  8608. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  8609. begin
  8610. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  8611. if taicpu(p).opsize = S_L then
  8612. begin
  8613. Include(OptsToCheck,aoc_MovAnd2Mov_3);
  8614. Result := True;
  8615. end;
  8616. end;
  8617. end;
  8618. { Backward check to determine necessity of and %reg,%reg }
  8619. if (taicpu(p).oper[0]^.typ = top_reg) and
  8620. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  8621. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  8622. GetLastInstruction(p, hp2) and
  8623. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp2) and
  8624. { Check size of adjacent instruction to determine if the AND is
  8625. effectively a null operation }
  8626. (
  8627. (taicpu(p).opsize = taicpu(hp2).opsize) or
  8628. { Note: Don't include S_Q }
  8629. ((taicpu(p).opsize = S_L) and (taicpu(hp2).opsize in [S_BL, S_WL])) or
  8630. ((taicpu(p).opsize = S_W) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_L])) or
  8631. ((taicpu(p).opsize = S_B) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_W, S_L]))
  8632. ) then
  8633. begin
  8634. DebugMsg(SPeepholeOptimization + 'And2Nop', p);
  8635. { If GetNextInstruction returned False, hp1 will be nil }
  8636. RemoveCurrentP(p, hp1);
  8637. Result := True;
  8638. Exit;
  8639. end;
  8640. end;
  8641. function TX86AsmOptimizer.OptPass2ADD(var p : tai) : boolean;
  8642. var
  8643. hp1: tai; NewRef: TReference;
  8644. { This entire nested function is used in an if-statement below, but we
  8645. want to avoid all the used reg transfers and GetNextInstruction calls
  8646. until we really have to check }
  8647. function MemRegisterNotUsedLater: Boolean; inline;
  8648. var
  8649. hp2: tai;
  8650. begin
  8651. TransferUsedRegs(TmpUsedRegs);
  8652. hp2 := p;
  8653. repeat
  8654. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  8655. until not GetNextInstruction(hp2, hp2) or (hp2 = hp1);
  8656. Result := not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs);
  8657. end;
  8658. begin
  8659. Result := False;
  8660. if not GetNextInstruction(p, hp1) or (hp1.typ <> ait_instruction) then
  8661. Exit;
  8662. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif}]) then
  8663. begin
  8664. { Change:
  8665. add %reg2,%reg1
  8666. mov/s/z #(%reg1),%reg1 (%reg1 superregisters must be the same)
  8667. To:
  8668. mov/s/z #(%reg1,%reg2),%reg1
  8669. }
  8670. if MatchOpType(taicpu(p), top_reg, top_reg) and
  8671. MatchInstruction(hp1, [A_MOV, A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif}], []) and
  8672. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  8673. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  8674. (
  8675. (
  8676. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  8677. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) and
  8678. { r/esp cannot be an index }
  8679. (taicpu(p).oper[0]^.reg<>NR_STACK_POINTER_REG)
  8680. ) or (
  8681. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  8682. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  8683. )
  8684. ) and (
  8685. Reg1WriteOverwritesReg2Entirely(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) or
  8686. (
  8687. { If the super registers ARE equal, then this MOV/S/Z does a partial write }
  8688. not SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8689. MemRegisterNotUsedLater
  8690. )
  8691. ) then
  8692. begin
  8693. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[1]^.reg;
  8694. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  8695. DebugMsg(SPeepholeOptimization + 'AddMov2Mov done', p);
  8696. RemoveCurrentp(p, hp1);
  8697. Result := True;
  8698. Exit;
  8699. end;
  8700. { Change:
  8701. addl/q $x,%reg1
  8702. movl/q %reg1,%reg2
  8703. To:
  8704. leal/q $x(%reg1),%reg2
  8705. addl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8706. Breaks the dependency chain.
  8707. }
  8708. if MatchOpType(taicpu(p),top_const,top_reg) and
  8709. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8710. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8711. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8712. (
  8713. { Don't do AddMov2LeaAdd under -Os, but do allow AddMov2Lea }
  8714. not (cs_opt_size in current_settings.optimizerswitches) or
  8715. (
  8716. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8717. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8718. )
  8719. ) then
  8720. begin
  8721. { Change the MOV instruction to a LEA instruction, and update the
  8722. first operand }
  8723. reference_reset(NewRef, 1, []);
  8724. NewRef.base := taicpu(p).oper[1]^.reg;
  8725. NewRef.scalefactor := 1;
  8726. NewRef.offset := taicpu(p).oper[0]^.val;
  8727. taicpu(hp1).opcode := A_LEA;
  8728. taicpu(hp1).loadref(0, NewRef);
  8729. TransferUsedRegs(TmpUsedRegs);
  8730. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8731. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8732. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8733. begin
  8734. { Move what is now the LEA instruction to before the SUB instruction }
  8735. Asml.Remove(hp1);
  8736. Asml.InsertBefore(hp1, p);
  8737. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8738. DebugMsg(SPeepholeOptimization + 'AddMov2LeaAdd', p);
  8739. p := hp1;
  8740. end
  8741. else
  8742. begin
  8743. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8744. RemoveCurrentP(p, hp1);
  8745. DebugMsg(SPeepholeOptimization + 'AddMov2Lea', p);
  8746. end;
  8747. Result := True;
  8748. end;
  8749. end;
  8750. end;
  8751. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  8752. begin
  8753. Result:=false;
  8754. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8755. begin
  8756. if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  8757. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  8758. begin
  8759. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  8760. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  8761. taicpu(p).opcode:=A_ADD;
  8762. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  8763. result:=true;
  8764. end
  8765. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  8766. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  8767. begin
  8768. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  8769. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  8770. taicpu(p).opcode:=A_ADD;
  8771. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  8772. result:=true;
  8773. end;
  8774. end;
  8775. end;
  8776. function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
  8777. var
  8778. hp1: tai; NewRef: TReference;
  8779. begin
  8780. { Change:
  8781. subl/q $x,%reg1
  8782. movl/q %reg1,%reg2
  8783. To:
  8784. leal/q $-x(%reg1),%reg2
  8785. subl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8786. Breaks the dependency chain and potentially permits the removal of
  8787. a CMP instruction if one follows.
  8788. }
  8789. Result := False;
  8790. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  8791. MatchOpType(taicpu(p),top_const,top_reg) and
  8792. GetNextInstruction(p, hp1) and
  8793. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8794. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8795. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8796. (
  8797. { Don't do SubMov2LeaSub under -Os, but do allow SubMov2Lea }
  8798. not (cs_opt_size in current_settings.optimizerswitches) or
  8799. (
  8800. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8801. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8802. )
  8803. ) then
  8804. begin
  8805. { Change the MOV instruction to a LEA instruction, and update the
  8806. first operand }
  8807. reference_reset(NewRef, 1, []);
  8808. NewRef.base := taicpu(p).oper[1]^.reg;
  8809. NewRef.scalefactor := 1;
  8810. NewRef.offset := -taicpu(p).oper[0]^.val;
  8811. taicpu(hp1).opcode := A_LEA;
  8812. taicpu(hp1).loadref(0, NewRef);
  8813. TransferUsedRegs(TmpUsedRegs);
  8814. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8815. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8816. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8817. begin
  8818. { Move what is now the LEA instruction to before the SUB instruction }
  8819. Asml.Remove(hp1);
  8820. Asml.InsertBefore(hp1, p);
  8821. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8822. DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
  8823. p := hp1;
  8824. end
  8825. else
  8826. begin
  8827. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8828. RemoveCurrentP(p, hp1);
  8829. DebugMsg(SPeepholeOptimization + 'SubMov2Lea', p);
  8830. end;
  8831. Result := True;
  8832. end;
  8833. end;
  8834. function TX86AsmOptimizer.SkipSimpleInstructions(var hp1 : tai) : Boolean;
  8835. begin
  8836. { we can skip all instructions not messing with the stack pointer }
  8837. while assigned(hp1) and {MatchInstruction(hp1,[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  8838. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  8839. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  8840. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  8841. ({(taicpu(hp1).ops=0) or }
  8842. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  8843. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  8844. ) and }
  8845. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  8846. )
  8847. ) do
  8848. GetNextInstruction(hp1,hp1);
  8849. Result:=assigned(hp1);
  8850. end;
  8851. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  8852. var
  8853. hp1, hp2, hp3, hp4, hp5: tai;
  8854. begin
  8855. Result:=false;
  8856. hp5:=nil;
  8857. { replace
  8858. leal(q) x(<stackpointer>),<stackpointer>
  8859. call procname
  8860. leal(q) -x(<stackpointer>),<stackpointer>
  8861. ret
  8862. by
  8863. jmp procname
  8864. but do it only on level 4 because it destroys stack back traces
  8865. }
  8866. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8867. MatchOpType(taicpu(p),top_ref,top_reg) and
  8868. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8869. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  8870. { the -8 or -24 are not required, but bail out early if possible,
  8871. higher values are unlikely }
  8872. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  8873. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  8874. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  8875. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  8876. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  8877. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8878. GetNextInstruction(p, hp1) and
  8879. { Take a copy of hp1 }
  8880. SetAndTest(hp1, hp4) and
  8881. { trick to skip label }
  8882. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8883. SkipSimpleInstructions(hp1) and
  8884. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8885. GetNextInstruction(hp1, hp2) and
  8886. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  8887. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  8888. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  8889. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8890. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  8891. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  8892. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  8893. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  8894. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8895. GetNextInstruction(hp2, hp3) and
  8896. { trick to skip label }
  8897. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8898. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8899. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8900. SetAndTest(hp3,hp5) and
  8901. GetNextInstruction(hp3,hp3) and
  8902. MatchInstruction(hp3,A_RET,[S_NO])
  8903. )
  8904. ) and
  8905. (taicpu(hp3).ops=0) then
  8906. begin
  8907. taicpu(hp1).opcode := A_JMP;
  8908. taicpu(hp1).is_jmp := true;
  8909. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  8910. RemoveCurrentP(p, hp4);
  8911. RemoveInstruction(hp2);
  8912. RemoveInstruction(hp3);
  8913. if Assigned(hp5) then
  8914. begin
  8915. AsmL.Remove(hp5);
  8916. ASmL.InsertBefore(hp5,hp1)
  8917. end;
  8918. Result:=true;
  8919. end;
  8920. end;
  8921. function TX86AsmOptimizer.PostPeepholeOptPush(var p : tai) : Boolean;
  8922. {$ifdef x86_64}
  8923. var
  8924. hp1, hp2, hp3, hp4, hp5: tai;
  8925. {$endif x86_64}
  8926. begin
  8927. Result:=false;
  8928. {$ifdef x86_64}
  8929. hp5:=nil;
  8930. { replace
  8931. push %rax
  8932. call procname
  8933. pop %rcx
  8934. ret
  8935. by
  8936. jmp procname
  8937. but do it only on level 4 because it destroys stack back traces
  8938. It depends on the fact, that the sequence push rax/pop rcx is used for stack alignment as rcx is volatile
  8939. for all supported calling conventions
  8940. }
  8941. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8942. MatchOpType(taicpu(p),top_reg) and
  8943. (taicpu(p).oper[0]^.reg=NR_RAX) and
  8944. GetNextInstruction(p, hp1) and
  8945. { Take a copy of hp1 }
  8946. SetAndTest(hp1, hp4) and
  8947. { trick to skip label }
  8948. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8949. SkipSimpleInstructions(hp1) and
  8950. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8951. GetNextInstruction(hp1, hp2) and
  8952. MatchInstruction(hp2,A_POP,[taicpu(p).opsize]) and
  8953. MatchOpType(taicpu(hp2),top_reg) and
  8954. (taicpu(hp2).oper[0]^.reg=NR_RCX) and
  8955. GetNextInstruction(hp2, hp3) and
  8956. { trick to skip label }
  8957. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8958. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8959. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8960. SetAndTest(hp3,hp5) and
  8961. GetNextInstruction(hp3,hp3) and
  8962. MatchInstruction(hp3,A_RET,[S_NO])
  8963. )
  8964. ) and
  8965. (taicpu(hp3).ops=0) then
  8966. begin
  8967. taicpu(hp1).opcode := A_JMP;
  8968. taicpu(hp1).is_jmp := true;
  8969. DebugMsg(SPeepholeOptimization + 'PushCallPushRet2Jmp done',p);
  8970. RemoveCurrentP(p, hp4);
  8971. RemoveInstruction(hp2);
  8972. RemoveInstruction(hp3);
  8973. if Assigned(hp5) then
  8974. begin
  8975. AsmL.Remove(hp5);
  8976. ASmL.InsertBefore(hp5,hp1)
  8977. end;
  8978. Result:=true;
  8979. end;
  8980. {$endif x86_64}
  8981. end;
  8982. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  8983. var
  8984. Value, RegName: string;
  8985. begin
  8986. Result:=false;
  8987. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  8988. begin
  8989. case taicpu(p).oper[0]^.val of
  8990. 0:
  8991. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  8992. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8993. begin
  8994. { change "mov $0,%reg" into "xor %reg,%reg" }
  8995. taicpu(p).opcode := A_XOR;
  8996. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  8997. Result := True;
  8998. {$ifdef x86_64}
  8999. end
  9000. else if (taicpu(p).opsize = S_Q) then
  9001. begin
  9002. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  9003. { The actual optimization }
  9004. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9005. taicpu(p).changeopsize(S_L);
  9006. DebugMsg(SPeepholeOptimization + 'movq $0,' + RegName + ' -> movl $0,' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  9007. Result := True;
  9008. end;
  9009. $1..$FFFFFFFF:
  9010. begin
  9011. { Code size reduction by J. Gareth "Kit" Moreton }
  9012. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  9013. case taicpu(p).opsize of
  9014. S_Q:
  9015. begin
  9016. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  9017. Value := debug_tostr(taicpu(p).oper[0]^.val);
  9018. { The actual optimization }
  9019. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9020. taicpu(p).changeopsize(S_L);
  9021. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  9022. Result := True;
  9023. end;
  9024. else
  9025. { Do nothing };
  9026. end;
  9027. {$endif x86_64}
  9028. end;
  9029. -1:
  9030. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  9031. if (cs_opt_size in current_settings.optimizerswitches) and
  9032. (taicpu(p).opsize <> S_B) and
  9033. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  9034. begin
  9035. { change "mov $-1,%reg" into "or $-1,%reg" }
  9036. { NOTES:
  9037. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  9038. - This operation creates a false dependency on the register, so only do it when optimising for size
  9039. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  9040. }
  9041. taicpu(p).opcode := A_OR;
  9042. Result := True;
  9043. end;
  9044. else
  9045. { Do nothing };
  9046. end;
  9047. end;
  9048. end;
  9049. function TX86AsmOptimizer.PostPeepholeOptAnd(var p : tai) : boolean;
  9050. var
  9051. hp1: tai;
  9052. begin
  9053. { Detect:
  9054. andw x, %ax (0 <= x < $8000)
  9055. ...
  9056. movzwl %ax,%eax
  9057. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  9058. }
  9059. Result := False; if MatchOpType(taicpu(p), top_const, top_reg) and
  9060. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  9061. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val) and
  9062. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  9063. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  9064. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  9065. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  9066. begin
  9067. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via AndMovz2AndCwtl)', hp1);
  9068. taicpu(hp1).opcode := A_CWDE;
  9069. taicpu(hp1).clearop(0);
  9070. taicpu(hp1).clearop(1);
  9071. taicpu(hp1).ops := 0;
  9072. { A change was made, but not with p, so move forward 1 }
  9073. p := tai(p.Next);
  9074. Result := True;
  9075. end;
  9076. end;
  9077. function TX86AsmOptimizer.PostPeepholeOptMOVSX(var p : tai) : boolean;
  9078. begin
  9079. Result := False;
  9080. if not MatchOpType(taicpu(p), top_reg, top_reg) then
  9081. Exit;
  9082. { Convert:
  9083. movswl %ax,%eax -> cwtl
  9084. movslq %eax,%rax -> cdqe
  9085. NOTE: Don't convert movswl %al,%ax to cbw, because cbw and cwde
  9086. refer to the same opcode and depends only on the assembler's
  9087. current operand-size attribute. [Kit]
  9088. }
  9089. with taicpu(p) do
  9090. case opsize of
  9091. S_WL:
  9092. if (oper[0]^.reg = NR_AX) and (oper[1]^.reg = NR_EAX) then
  9093. begin
  9094. DebugMsg(SPeepholeOptimization + 'Converted movswl %ax,%eax to cwtl', p);
  9095. opcode := A_CWDE;
  9096. clearop(0);
  9097. clearop(1);
  9098. ops := 0;
  9099. Result := True;
  9100. end;
  9101. {$ifdef x86_64}
  9102. S_LQ:
  9103. if (oper[0]^.reg = NR_EAX) and (oper[1]^.reg = NR_RAX) then
  9104. begin
  9105. DebugMsg(SPeepholeOptimization + 'Converted movslq %eax,%rax to cltq', p);
  9106. opcode := A_CDQE;
  9107. clearop(0);
  9108. clearop(1);
  9109. ops := 0;
  9110. Result := True;
  9111. end;
  9112. {$endif x86_64}
  9113. else
  9114. ;
  9115. end;
  9116. end;
  9117. function TX86AsmOptimizer.PostPeepholeOptShr(var p : tai) : boolean;
  9118. var
  9119. hp1: tai;
  9120. begin
  9121. { Detect:
  9122. shr x, %ax (x > 0)
  9123. ...
  9124. movzwl %ax,%eax
  9125. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  9126. }
  9127. Result := False;
  9128. if MatchOpType(taicpu(p), top_const, top_reg) and
  9129. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  9130. (taicpu(p).oper[0]^.val > 0) and
  9131. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  9132. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  9133. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  9134. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  9135. begin
  9136. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via ShrMovz2ShrCwtl)', hp1);
  9137. taicpu(hp1).opcode := A_CWDE;
  9138. taicpu(hp1).clearop(0);
  9139. taicpu(hp1).clearop(1);
  9140. taicpu(hp1).ops := 0;
  9141. { A change was made, but not with p, so move forward 1 }
  9142. p := tai(p.Next);
  9143. Result := True;
  9144. end;
  9145. end;
  9146. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  9147. begin
  9148. Result:=false;
  9149. { change "cmp $0, %reg" to "test %reg, %reg" }
  9150. if MatchOpType(taicpu(p),top_const,top_reg) and
  9151. (taicpu(p).oper[0]^.val = 0) then
  9152. begin
  9153. taicpu(p).opcode := A_TEST;
  9154. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  9155. Result:=true;
  9156. end;
  9157. end;
  9158. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  9159. var
  9160. IsTestConstX : Boolean;
  9161. hp1,hp2 : tai;
  9162. begin
  9163. Result:=false;
  9164. { removes the line marked with (x) from the sequence
  9165. and/or/xor/add/sub/... $x, %y
  9166. test/or %y, %y | test $-1, %y (x)
  9167. j(n)z _Label
  9168. as the first instruction already adjusts the ZF
  9169. %y operand may also be a reference }
  9170. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  9171. MatchOperand(taicpu(p).oper[0]^,-1);
  9172. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  9173. GetLastInstruction(p, hp1) and
  9174. (tai(hp1).typ = ait_instruction) and
  9175. GetNextInstruction(p,hp2) and
  9176. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  9177. case taicpu(hp1).opcode Of
  9178. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  9179. begin
  9180. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9181. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9182. { and in case of carry for A(E)/B(E)/C/NC }
  9183. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  9184. ((taicpu(hp1).opcode <> A_ADD) and
  9185. (taicpu(hp1).opcode <> A_SUB))) then
  9186. begin
  9187. RemoveCurrentP(p, hp2);
  9188. Result:=true;
  9189. Exit;
  9190. end;
  9191. end;
  9192. A_SHL, A_SAL, A_SHR, A_SAR:
  9193. begin
  9194. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9195. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  9196. { therefore, it's only safe to do this optimization for }
  9197. { shifts by a (nonzero) constant }
  9198. (taicpu(hp1).oper[0]^.typ = top_const) and
  9199. (taicpu(hp1).oper[0]^.val <> 0) and
  9200. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9201. { and in case of carry for A(E)/B(E)/C/NC }
  9202. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9203. begin
  9204. RemoveCurrentP(p, hp2);
  9205. Result:=true;
  9206. Exit;
  9207. end;
  9208. end;
  9209. A_DEC, A_INC, A_NEG:
  9210. begin
  9211. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  9212. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9213. { and in case of carry for A(E)/B(E)/C/NC }
  9214. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9215. begin
  9216. RemoveCurrentP(p, hp2);
  9217. Result:=true;
  9218. Exit;
  9219. end;
  9220. end
  9221. else
  9222. ;
  9223. end; { case }
  9224. { change "test $-1,%reg" into "test %reg,%reg" }
  9225. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  9226. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  9227. { Change "or %reg,%reg" to "test %reg,%reg" as OR generates a false dependency }
  9228. if MatchInstruction(p, A_OR, []) and
  9229. { Can only match if they're both registers }
  9230. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) then
  9231. begin
  9232. DebugMsg(SPeepholeOptimization + 'or %reg,%reg -> test %reg,%reg to remove false dependency (Or2Test)', p);
  9233. taicpu(p).opcode := A_TEST;
  9234. { No need to set Result to True, as we've done all the optimisations we can }
  9235. end;
  9236. end;
  9237. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  9238. var
  9239. hp1,hp3 : tai;
  9240. {$ifndef x86_64}
  9241. hp2 : taicpu;
  9242. {$endif x86_64}
  9243. begin
  9244. Result:=false;
  9245. hp3:=nil;
  9246. {$ifndef x86_64}
  9247. { don't do this on modern CPUs, this really hurts them due to
  9248. broken call/ret pairing }
  9249. if (current_settings.optimizecputype < cpu_Pentium2) and
  9250. not(cs_create_pic in current_settings.moduleswitches) and
  9251. GetNextInstruction(p, hp1) and
  9252. MatchInstruction(hp1,A_JMP,[S_NO]) and
  9253. MatchOpType(taicpu(hp1),top_ref) and
  9254. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  9255. begin
  9256. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  9257. InsertLLItem(p.previous, p, hp2);
  9258. taicpu(p).opcode := A_JMP;
  9259. taicpu(p).is_jmp := true;
  9260. RemoveInstruction(hp1);
  9261. Result:=true;
  9262. end
  9263. else
  9264. {$endif x86_64}
  9265. { replace
  9266. call procname
  9267. ret
  9268. by
  9269. jmp procname
  9270. but do it only on level 4 because it destroys stack back traces
  9271. else if the subroutine is marked as no return, remove the ret
  9272. }
  9273. if ((cs_opt_level4 in current_settings.optimizerswitches) or
  9274. (po_noreturn in current_procinfo.procdef.procoptions)) and
  9275. GetNextInstruction(p, hp1) and
  9276. (MatchInstruction(hp1,A_RET,[S_NO]) or
  9277. (MatchInstruction(hp1,A_VZEROUPPER,[S_NO]) and
  9278. SetAndTest(hp1,hp3) and
  9279. GetNextInstruction(hp1,hp1) and
  9280. MatchInstruction(hp1,A_RET,[S_NO])
  9281. )
  9282. ) and
  9283. (taicpu(hp1).ops=0) then
  9284. begin
  9285. if (cs_opt_level4 in current_settings.optimizerswitches) and
  9286. { we might destroy stack alignment here if we do not do a call }
  9287. (target_info.stackalign<=sizeof(SizeUInt)) then
  9288. begin
  9289. taicpu(p).opcode := A_JMP;
  9290. taicpu(p).is_jmp := true;
  9291. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  9292. end
  9293. else
  9294. DebugMsg(SPeepholeOptimization + 'CallRet2Call done',p);
  9295. RemoveInstruction(hp1);
  9296. if Assigned(hp3) then
  9297. begin
  9298. AsmL.Remove(hp3);
  9299. AsmL.InsertBefore(hp3,p)
  9300. end;
  9301. Result:=true;
  9302. end;
  9303. end;
  9304. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  9305. function ConstInRange(const Val: TCGInt; const OpSize: TOpSize): Boolean;
  9306. begin
  9307. case OpSize of
  9308. S_B, S_BW, S_BL{$ifdef x86_64}, S_BQ{$endif x86_64}:
  9309. Result := (Val <= $FF) and (Val >= -128);
  9310. S_W, S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  9311. Result := (Val <= $FFFF) and (Val >= -32768);
  9312. S_L{$ifdef x86_64}, S_LQ{$endif x86_64}:
  9313. Result := (Val <= $FFFFFFFF) and (Val >= -2147483648);
  9314. else
  9315. Result := True;
  9316. end;
  9317. end;
  9318. var
  9319. hp1, hp2 : tai;
  9320. SizeChange: Boolean;
  9321. PreMessage: string;
  9322. begin
  9323. Result := False;
  9324. if (taicpu(p).oper[0]^.typ = top_reg) and
  9325. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  9326. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) then
  9327. begin
  9328. { Change (using movzbl %al,%eax as an example):
  9329. movzbl %al, %eax movzbl %al, %eax
  9330. cmpl x, %eax testl %eax,%eax
  9331. To:
  9332. cmpb x, %al testb %al, %al (Move one back to avoid a false dependency)
  9333. movzbl %al, %eax movzbl %al, %eax
  9334. Smaller instruction and minimises pipeline stall as the CPU
  9335. doesn't have to wait for the register to get zero-extended. [Kit]
  9336. Also allow if the smaller of the two registers is being checked,
  9337. as this still removes the false dependency.
  9338. }
  9339. if
  9340. (
  9341. (
  9342. (taicpu(hp1).opcode = A_CMP) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  9343. ConstInRange(taicpu(hp1).oper[0]^.val, taicpu(p).opsize)
  9344. ) or (
  9345. { If MatchOperand returns True, they must both be registers }
  9346. (taicpu(hp1).opcode = A_TEST) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)
  9347. )
  9348. ) and
  9349. (reg2opsize(taicpu(hp1).oper[1]^.reg) <= reg2opsize(taicpu(p).oper[1]^.reg)) and
  9350. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) then
  9351. begin
  9352. PreMessage := debug_op2str(taicpu(hp1).opcode) + debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' -> ' + debug_op2str(taicpu(hp1).opcode);
  9353. asml.Remove(hp1);
  9354. asml.InsertBefore(hp1, p);
  9355. { Swap instructions in the case of cmp 0,%reg or test %reg,%reg }
  9356. if (taicpu(hp1).opcode = A_TEST) or (taicpu(hp1).oper[0]^.val = 0) then
  9357. begin
  9358. taicpu(hp1).opcode := A_TEST;
  9359. taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
  9360. end;
  9361. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  9362. case taicpu(p).opsize of
  9363. S_BW, S_BL:
  9364. begin
  9365. SizeChange := taicpu(hp1).opsize <> S_B;
  9366. taicpu(hp1).changeopsize(S_B);
  9367. end;
  9368. S_WL:
  9369. begin
  9370. SizeChange := taicpu(hp1).opsize <> S_W;
  9371. taicpu(hp1).changeopsize(S_W);
  9372. end
  9373. else
  9374. InternalError(2020112701);
  9375. end;
  9376. UpdateUsedRegs(tai(p.Next));
  9377. { Check if the register is used aferwards - if not, we can
  9378. remove the movzx instruction completely }
  9379. if not RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, p, UsedRegs) then
  9380. begin
  9381. { Hp1 is a better position than p for debugging purposes }
  9382. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4a', hp1);
  9383. RemoveCurrentp(p, hp1);
  9384. Result := True;
  9385. end;
  9386. if SizeChange then
  9387. DebugMsg(SPeepholeOptimization + PreMessage +
  9388. debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (smaller and minimises pipeline stall - MovzxCmp2CmpMovzx)', hp1)
  9389. else
  9390. DebugMsg(SPeepholeOptimization + 'MovzxCmp2CmpMovzx', hp1);
  9391. Exit;
  9392. end;
  9393. { Change (using movzwl %ax,%eax as an example):
  9394. movzwl %ax, %eax
  9395. movb %al, (dest) (Register is smaller than read register in movz)
  9396. To:
  9397. movb %al, (dest) (Move one back to avoid a false dependency)
  9398. movzwl %ax, %eax
  9399. }
  9400. if (taicpu(hp1).opcode = A_MOV) and
  9401. (taicpu(hp1).oper[0]^.typ = top_reg) and
  9402. not RegInOp(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^) and
  9403. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  9404. (reg2opsize(taicpu(hp1).oper[0]^.reg) <= reg2opsize(taicpu(p).oper[0]^.reg)) then
  9405. begin
  9406. DebugMsg(SPeepholeOptimization + 'MovzxMov2MovMovzx', hp1);
  9407. hp2 := tai(hp1.Previous); { Effectively the old position of hp1 }
  9408. asml.Remove(hp1);
  9409. asml.InsertBefore(hp1, p);
  9410. if taicpu(hp1).oper[1]^.typ = top_reg then
  9411. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, hp2, UsedRegs);
  9412. { Check if the register is used aferwards - if not, we can
  9413. remove the movzx instruction completely }
  9414. if not RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, p, UsedRegs) then
  9415. begin
  9416. { Hp1 is a better position than p for debugging purposes }
  9417. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4b', hp1);
  9418. RemoveCurrentp(p, hp1);
  9419. Result := True;
  9420. end;
  9421. Exit;
  9422. end;
  9423. end;
  9424. {$ifdef x86_64}
  9425. { Code size reduction by J. Gareth "Kit" Moreton }
  9426. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  9427. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  9428. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  9429. then
  9430. begin
  9431. { Has 64-bit register name and opcode suffix }
  9432. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  9433. { The actual optimization }
  9434. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9435. if taicpu(p).opsize = S_BQ then
  9436. taicpu(p).changeopsize(S_BL)
  9437. else
  9438. taicpu(p).changeopsize(S_WL);
  9439. DebugMsg(SPeepholeOptimization + PreMessage +
  9440. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  9441. end;
  9442. {$endif}
  9443. end;
  9444. {$ifdef x86_64}
  9445. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  9446. var
  9447. PreMessage, RegName: string;
  9448. begin
  9449. { Code size reduction by J. Gareth "Kit" Moreton }
  9450. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  9451. as this removes the REX prefix }
  9452. Result := False;
  9453. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  9454. Exit;
  9455. if taicpu(p).oper[0]^.typ <> top_reg then
  9456. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  9457. InternalError(2018011500);
  9458. case taicpu(p).opsize of
  9459. S_Q:
  9460. begin
  9461. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  9462. begin
  9463. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  9464. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  9465. { The actual optimization }
  9466. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  9467. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9468. taicpu(p).changeopsize(S_L);
  9469. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  9470. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  9471. end;
  9472. end;
  9473. else
  9474. ;
  9475. end;
  9476. end;
  9477. {$endif}
  9478. class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
  9479. var
  9480. OperIdx: Integer;
  9481. begin
  9482. for OperIdx := 0 to p.ops - 1 do
  9483. if p.oper[OperIdx]^.typ = top_ref then
  9484. optimize_ref(p.oper[OperIdx]^.ref^, False);
  9485. end;
  9486. end.