aoptx86.pas 442 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TOptsToCheck = (
  29. aoc_MovAnd2Mov_3
  30. );
  31. TX86AsmOptimizer = class(TAsmOptimizer)
  32. { some optimizations are very expensive to check, so the
  33. pre opt pass can be used to set some flags, depending on the found
  34. instructions if it is worth to check a certain optimization }
  35. OptsToCheck : set of TOptsToCheck;
  36. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  37. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  38. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  39. function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  40. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  41. { This version of GetNextInstructionUsingReg will look across conditional jumps,
  42. potentially allowing further optimisation (although it might need to know if
  43. it crossed a conditional jump. }
  44. function GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  45. {
  46. In comparison with GetNextInstructionUsingReg, GetNextInstructionUsingRegTrackingUse tracks
  47. the use of a register by allocs/dealloc, so it can ignore calls.
  48. In the following example, GetNextInstructionUsingReg will return the second movq,
  49. GetNextInstructionUsingRegTrackingUse won't.
  50. movq %rdi,%rax
  51. # Register rdi released
  52. # Register rdi allocated
  53. movq %rax,%rdi
  54. While in this example:
  55. movq %rdi,%rax
  56. call proc
  57. movq %rdi,%rax
  58. GetNextInstructionUsingRegTrackingUse will return the second instruction while GetNextInstructionUsingReg
  59. won't.
  60. }
  61. function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  62. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  63. private
  64. function SkipSimpleInstructions(var hp1: tai): Boolean;
  65. protected
  66. class function IsMOVZXAcceptable: Boolean; static; inline;
  67. { Attempts to allocate a volatile integer register for use between p and hp,
  68. using AUsedRegs for the current register usage information. Returns NR_NO
  69. if no free register could be found }
  70. function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  71. { Attempts to allocate a volatile MM register for use between p and hp,
  72. using AUsedRegs for the current register usage information. Returns NR_NO
  73. if no free register could be found }
  74. function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  75. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  76. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  77. { checks whether reading the value in reg1 depends on the value of reg2. This
  78. is very similar to SuperRegisterEquals, except it takes into account that
  79. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  80. depend on the value in AH). }
  81. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  82. { Replaces all references to AOldReg in a memory reference to ANewReg }
  83. class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
  84. { Replaces all references to AOldReg in an operand to ANewReg }
  85. class function ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean; static;
  86. { Replaces all references to AOldReg in an instruction to ANewReg,
  87. except where the register is being written }
  88. function ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  89. { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
  90. or writes to a global symbol }
  91. class function IsRefSafe(const ref: PReference): Boolean; static; inline;
  92. { Returns true if the given MOV instruction can be safely converted to CMOV }
  93. class function CanBeCMOV(p : tai) : boolean; static;
  94. { Converts the LEA instruction to ADD/INC/SUB/DEC. Returns True if the
  95. conversion was successful }
  96. function ConvertLEA(const p : taicpu): Boolean;
  97. function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  98. procedure DebugMsg(const s : string; p : tai);inline;
  99. class function IsExitCode(p : tai) : boolean; static;
  100. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean; static;
  101. procedure RemoveLastDeallocForFuncRes(p : tai);
  102. function DoSubAddOpt(var p : tai) : Boolean;
  103. function PrePeepholeOptSxx(var p : tai) : boolean;
  104. function PrePeepholeOptIMUL(var p : tai) : boolean;
  105. function PrePeepholeOptAND(var p : tai) : boolean;
  106. function OptPass1Test(var p: tai): boolean;
  107. function OptPass1Add(var p: tai): boolean;
  108. function OptPass1AND(var p : tai) : boolean;
  109. function OptPass1_V_MOVAP(var p : tai) : boolean;
  110. function OptPass1VOP(var p : tai) : boolean;
  111. function OptPass1MOV(var p : tai) : boolean;
  112. function OptPass1Movx(var p : tai) : boolean;
  113. function OptPass1MOVXX(var p : tai) : boolean;
  114. function OptPass1OP(var p : tai) : boolean;
  115. function OptPass1LEA(var p : tai) : boolean;
  116. function OptPass1Sub(var p : tai) : boolean;
  117. function OptPass1SHLSAL(var p : tai) : boolean;
  118. function OptPass1FSTP(var p : tai) : boolean;
  119. function OptPass1FLD(var p : tai) : boolean;
  120. function OptPass1Cmp(var p : tai) : boolean;
  121. function OptPass1PXor(var p : tai) : boolean;
  122. function OptPass1VPXor(var p: tai): boolean;
  123. function OptPass1Imul(var p : tai) : boolean;
  124. function OptPass1Jcc(var p : tai) : boolean;
  125. function OptPass1SHXX(var p: tai): boolean;
  126. function OptPass2Movx(var p : tai): Boolean;
  127. function OptPass2MOV(var p : tai) : boolean;
  128. function OptPass2Imul(var p : tai) : boolean;
  129. function OptPass2Jmp(var p : tai) : boolean;
  130. function OptPass2Jcc(var p : tai) : boolean;
  131. function OptPass2Lea(var p: tai): Boolean;
  132. function OptPass2SUB(var p: tai): Boolean;
  133. function OptPass2ADD(var p : tai): Boolean;
  134. function OptPass2SETcc(var p : tai) : boolean;
  135. function CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  136. function PostPeepholeOptMov(var p : tai) : Boolean;
  137. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  138. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  139. function PostPeepholeOptXor(var p : tai) : Boolean;
  140. {$endif}
  141. function PostPeepholeOptAnd(var p : tai) : boolean;
  142. function PostPeepholeOptMOVSX(var p : tai) : boolean;
  143. function PostPeepholeOptCmp(var p : tai) : Boolean;
  144. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  145. function PostPeepholeOptCall(var p : tai) : Boolean;
  146. function PostPeepholeOptLea(var p : tai) : Boolean;
  147. function PostPeepholeOptPush(var p: tai): Boolean;
  148. function PostPeepholeOptShr(var p : tai) : boolean;
  149. procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
  150. function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  151. procedure SwapMovCmp(var p, hp1: tai);
  152. { Processor-dependent reference optimisation }
  153. class procedure OptimizeRefs(var p: taicpu); static;
  154. end;
  155. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  156. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  157. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  158. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  159. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  160. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  161. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  162. {$if max_operands>2}
  163. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  164. {$endif max_operands>2}
  165. function RefsEqual(const r1, r2: treference): boolean;
  166. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  167. { returns true, if ref is a reference using only the registers passed as base and index
  168. and having an offset }
  169. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  170. implementation
  171. uses
  172. cutils,verbose,
  173. systems,
  174. globals,
  175. cpuinfo,
  176. procinfo,
  177. paramgr,
  178. aasmbase,
  179. aoptbase,aoptutils,
  180. symconst,symsym,
  181. cgx86,
  182. itcpugas;
  183. {$ifdef DEBUG_AOPTCPU}
  184. const
  185. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  186. {$else DEBUG_AOPTCPU}
  187. { Empty strings help the optimizer to remove string concatenations that won't
  188. ever appear to the user on release builds. [Kit] }
  189. const
  190. SPeepholeOptimization = '';
  191. {$endif DEBUG_AOPTCPU}
  192. LIST_STEP_SIZE = 4;
  193. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  194. begin
  195. result :=
  196. (instr.typ = ait_instruction) and
  197. (taicpu(instr).opcode = op) and
  198. ((opsize = []) or (taicpu(instr).opsize in opsize));
  199. end;
  200. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  201. begin
  202. result :=
  203. (instr.typ = ait_instruction) and
  204. ((taicpu(instr).opcode = op1) or
  205. (taicpu(instr).opcode = op2)
  206. ) and
  207. ((opsize = []) or (taicpu(instr).opsize in opsize));
  208. end;
  209. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  210. begin
  211. result :=
  212. (instr.typ = ait_instruction) and
  213. ((taicpu(instr).opcode = op1) or
  214. (taicpu(instr).opcode = op2) or
  215. (taicpu(instr).opcode = op3)
  216. ) and
  217. ((opsize = []) or (taicpu(instr).opsize in opsize));
  218. end;
  219. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  220. const opsize : topsizes) : boolean;
  221. var
  222. op : TAsmOp;
  223. begin
  224. result:=false;
  225. for op in ops do
  226. begin
  227. if (instr.typ = ait_instruction) and
  228. (taicpu(instr).opcode = op) and
  229. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  230. begin
  231. result:=true;
  232. exit;
  233. end;
  234. end;
  235. end;
  236. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  237. begin
  238. result := (oper.typ = top_reg) and (oper.reg = reg);
  239. end;
  240. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  241. begin
  242. result := (oper.typ = top_const) and (oper.val = a);
  243. end;
  244. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  245. begin
  246. result := oper1.typ = oper2.typ;
  247. if result then
  248. case oper1.typ of
  249. top_const:
  250. Result:=oper1.val = oper2.val;
  251. top_reg:
  252. Result:=oper1.reg = oper2.reg;
  253. top_ref:
  254. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  255. else
  256. internalerror(2013102801);
  257. end
  258. end;
  259. function MatchOperand(const oper1: TOper; const oper2: TOper; const oper3: TOper): boolean;
  260. begin
  261. result := (oper1.typ = oper2.typ) and (oper1.typ = oper3.typ);
  262. if result then
  263. case oper1.typ of
  264. top_const:
  265. Result:=(oper1.val = oper2.val) and (oper1.val = oper3.val);
  266. top_reg:
  267. Result:=(oper1.reg = oper2.reg) and (oper1.reg = oper3.reg);
  268. top_ref:
  269. Result:=RefsEqual(oper1.ref^, oper2.ref^) and RefsEqual(oper1.ref^, oper3.ref^);
  270. else
  271. internalerror(2020052401);
  272. end
  273. end;
  274. function RefsEqual(const r1, r2: treference): boolean;
  275. begin
  276. RefsEqual :=
  277. (r1.offset = r2.offset) and
  278. (r1.segment = r2.segment) and (r1.base = r2.base) and
  279. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  280. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  281. (r1.relsymbol = r2.relsymbol) and
  282. (r1.volatility=[]) and
  283. (r2.volatility=[]);
  284. end;
  285. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  286. begin
  287. Result:=(ref.offset=0) and
  288. (ref.scalefactor in [0,1]) and
  289. (ref.segment=NR_NO) and
  290. (ref.symbol=nil) and
  291. (ref.relsymbol=nil) and
  292. ((base=NR_INVALID) or
  293. (ref.base=base)) and
  294. ((index=NR_INVALID) or
  295. (ref.index=index)) and
  296. (ref.volatility=[]);
  297. end;
  298. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  299. begin
  300. Result:=(ref.scalefactor in [0,1]) and
  301. (ref.segment=NR_NO) and
  302. (ref.symbol=nil) and
  303. (ref.relsymbol=nil) and
  304. ((base=NR_INVALID) or
  305. (ref.base=base)) and
  306. ((index=NR_INVALID) or
  307. (ref.index=index)) and
  308. (ref.volatility=[]);
  309. end;
  310. function InstrReadsFlags(p: tai): boolean;
  311. begin
  312. InstrReadsFlags := true;
  313. case p.typ of
  314. ait_instruction:
  315. if InsProp[taicpu(p).opcode].Ch*
  316. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  317. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  318. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  319. exit;
  320. ait_label:
  321. exit;
  322. else
  323. ;
  324. end;
  325. InstrReadsFlags := false;
  326. end;
  327. function TX86AsmOptimizer.GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  328. begin
  329. Next:=Current;
  330. repeat
  331. Result:=GetNextInstruction(Next,Next);
  332. until not (Result) or
  333. not(cs_opt_level3 in current_settings.optimizerswitches) or
  334. (Next.typ<>ait_instruction) or
  335. RegInInstruction(reg,Next) or
  336. is_calljmp(taicpu(Next).opcode);
  337. end;
  338. function TX86AsmOptimizer.GetNextInstructionUsingRegCond(Current: tai; out Next: tai; reg: TRegister; var CrossJump: Boolean): Boolean;
  339. begin
  340. { Note, CrossJump keeps its input value if a conditional jump is not found - it doesn't get set to False }
  341. Next := Current;
  342. repeat
  343. Result := GetNextInstruction(Next,Next);
  344. if Result and (Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) then
  345. if is_calljmpuncondret(taicpu(Next).opcode) then
  346. begin
  347. Result := False;
  348. Exit;
  349. end
  350. else
  351. CrossJump := True;
  352. until not Result or
  353. not (cs_opt_level3 in current_settings.optimizerswitches) or
  354. (Next.typ <> ait_instruction) or
  355. RegInInstruction(reg,Next);
  356. end;
  357. function TX86AsmOptimizer.GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
  358. begin
  359. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  360. begin
  361. Result:=GetNextInstruction(Current,Next);
  362. exit;
  363. end;
  364. Next:=tai(Current.Next);
  365. Result:=false;
  366. while assigned(Next) do
  367. begin
  368. if ((Next.typ=ait_instruction) and is_calljmp(taicpu(Next).opcode) and not(taicpu(Next).opcode=A_CALL)) or
  369. ((Next.typ=ait_regalloc) and (getsupreg(tai_regalloc(Next).reg)=getsupreg(reg))) or
  370. ((Next.typ=ait_label) and not(labelCanBeSkipped(Tai_Label(Next)))) then
  371. exit
  372. else if (Next.typ=ait_instruction) and RegInInstruction(reg,Next) and not(taicpu(Next).opcode=A_CALL) then
  373. begin
  374. Result:=true;
  375. exit;
  376. end;
  377. Next:=tai(Next.Next);
  378. end;
  379. end;
  380. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  381. begin
  382. Result:=RegReadByInstruction(reg,hp);
  383. end;
  384. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  385. var
  386. p: taicpu;
  387. opcount: longint;
  388. begin
  389. RegReadByInstruction := false;
  390. if hp.typ <> ait_instruction then
  391. exit;
  392. p := taicpu(hp);
  393. case p.opcode of
  394. A_CALL:
  395. regreadbyinstruction := true;
  396. A_IMUL:
  397. case p.ops of
  398. 1:
  399. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  400. (
  401. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  402. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  403. );
  404. 2,3:
  405. regReadByInstruction :=
  406. reginop(reg,p.oper[0]^) or
  407. reginop(reg,p.oper[1]^);
  408. else
  409. InternalError(2019112801);
  410. end;
  411. A_MUL:
  412. begin
  413. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  414. (
  415. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  416. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  417. );
  418. end;
  419. A_IDIV,A_DIV:
  420. begin
  421. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  422. (
  423. (getregtype(reg)=R_INTREGISTER) and
  424. (
  425. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  426. )
  427. );
  428. end;
  429. else
  430. begin
  431. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  432. begin
  433. RegReadByInstruction := false;
  434. exit;
  435. end;
  436. for opcount := 0 to p.ops-1 do
  437. if (p.oper[opCount]^.typ = top_ref) and
  438. RegInRef(reg,p.oper[opcount]^.ref^) then
  439. begin
  440. RegReadByInstruction := true;
  441. exit
  442. end;
  443. { special handling for SSE MOVSD }
  444. if (p.opcode=A_MOVSD) and (p.ops>0) then
  445. begin
  446. if p.ops<>2 then
  447. internalerror(2017042702);
  448. regReadByInstruction := reginop(reg,p.oper[0]^) or
  449. (
  450. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  451. );
  452. exit;
  453. end;
  454. with insprop[p.opcode] do
  455. begin
  456. if getregtype(reg)=R_INTREGISTER then
  457. begin
  458. case getsupreg(reg) of
  459. RS_EAX:
  460. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  461. begin
  462. RegReadByInstruction := true;
  463. exit
  464. end;
  465. RS_ECX:
  466. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  467. begin
  468. RegReadByInstruction := true;
  469. exit
  470. end;
  471. RS_EDX:
  472. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  473. begin
  474. RegReadByInstruction := true;
  475. exit
  476. end;
  477. RS_EBX:
  478. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  479. begin
  480. RegReadByInstruction := true;
  481. exit
  482. end;
  483. RS_ESP:
  484. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  485. begin
  486. RegReadByInstruction := true;
  487. exit
  488. end;
  489. RS_EBP:
  490. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  491. begin
  492. RegReadByInstruction := true;
  493. exit
  494. end;
  495. RS_ESI:
  496. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  497. begin
  498. RegReadByInstruction := true;
  499. exit
  500. end;
  501. RS_EDI:
  502. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  503. begin
  504. RegReadByInstruction := true;
  505. exit
  506. end;
  507. end;
  508. end;
  509. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  510. begin
  511. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  512. begin
  513. case p.condition of
  514. C_A,C_NBE, { CF=0 and ZF=0 }
  515. C_BE,C_NA: { CF=1 or ZF=1 }
  516. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  517. C_AE,C_NB,C_NC, { CF=0 }
  518. C_B,C_NAE,C_C: { CF=1 }
  519. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  520. C_NE,C_NZ, { ZF=0 }
  521. C_E,C_Z: { ZF=1 }
  522. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  523. C_G,C_NLE, { ZF=0 and SF=OF }
  524. C_LE,C_NG: { ZF=1 or SF<>OF }
  525. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  526. C_GE,C_NL, { SF=OF }
  527. C_L,C_NGE: { SF<>OF }
  528. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  529. C_NO, { OF=0 }
  530. C_O: { OF=1 }
  531. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  532. C_NP,C_PO, { PF=0 }
  533. C_P,C_PE: { PF=1 }
  534. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  535. C_NS, { SF=0 }
  536. C_S: { SF=1 }
  537. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  538. else
  539. internalerror(2017042701);
  540. end;
  541. if RegReadByInstruction then
  542. exit;
  543. end;
  544. case getsubreg(reg) of
  545. R_SUBW,R_SUBD,R_SUBQ:
  546. RegReadByInstruction :=
  547. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  548. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  549. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  550. R_SUBFLAGCARRY:
  551. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  552. R_SUBFLAGPARITY:
  553. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  554. R_SUBFLAGAUXILIARY:
  555. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  556. R_SUBFLAGZERO:
  557. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  558. R_SUBFLAGSIGN:
  559. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  560. R_SUBFLAGOVERFLOW:
  561. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  562. R_SUBFLAGINTERRUPT:
  563. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  564. R_SUBFLAGDIRECTION:
  565. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  566. else
  567. internalerror(2017042601);
  568. end;
  569. exit;
  570. end;
  571. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  572. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  573. (p.oper[0]^.reg=p.oper[1]^.reg) then
  574. exit;
  575. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  576. begin
  577. RegReadByInstruction := true;
  578. exit
  579. end;
  580. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  581. begin
  582. RegReadByInstruction := true;
  583. exit
  584. end;
  585. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  586. begin
  587. RegReadByInstruction := true;
  588. exit
  589. end;
  590. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  591. begin
  592. RegReadByInstruction := true;
  593. exit
  594. end;
  595. end;
  596. end;
  597. end;
  598. end;
  599. function TX86AsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  600. begin
  601. result:=false;
  602. if p1.typ<>ait_instruction then
  603. exit;
  604. if (Ch_All in insprop[taicpu(p1).opcode].Ch) then
  605. exit(true);
  606. if (getregtype(reg)=R_INTREGISTER) and
  607. { change information for xmm movsd are not correct }
  608. ((taicpu(p1).opcode<>A_MOVSD) or (taicpu(p1).ops=0)) then
  609. begin
  610. case getsupreg(reg) of
  611. { RS_EAX = RS_RAX on x86-64 }
  612. RS_EAX:
  613. result:=([Ch_REAX,Ch_RRAX,Ch_WEAX,Ch_WRAX,Ch_RWEAX,Ch_RWRAX,Ch_MEAX,Ch_MRAX]*insprop[taicpu(p1).opcode].Ch)<>[];
  614. RS_ECX:
  615. result:=([Ch_RECX,Ch_RRCX,Ch_WECX,Ch_WRCX,Ch_RWECX,Ch_RWRCX,Ch_MECX,Ch_MRCX]*insprop[taicpu(p1).opcode].Ch)<>[];
  616. RS_EDX:
  617. result:=([Ch_REDX,Ch_RRDX,Ch_WEDX,Ch_WRDX,Ch_RWEDX,Ch_RWRDX,Ch_MEDX,Ch_MRDX]*insprop[taicpu(p1).opcode].Ch)<>[];
  618. RS_EBX:
  619. result:=([Ch_REBX,Ch_RRBX,Ch_WEBX,Ch_WRBX,Ch_RWEBX,Ch_RWRBX,Ch_MEBX,Ch_MRBX]*insprop[taicpu(p1).opcode].Ch)<>[];
  620. RS_ESP:
  621. result:=([Ch_RESP,Ch_RRSP,Ch_WESP,Ch_WRSP,Ch_RWESP,Ch_RWRSP,Ch_MESP,Ch_MRSP]*insprop[taicpu(p1).opcode].Ch)<>[];
  622. RS_EBP:
  623. result:=([Ch_REBP,Ch_RRBP,Ch_WEBP,Ch_WRBP,Ch_RWEBP,Ch_RWRBP,Ch_MEBP,Ch_MRBP]*insprop[taicpu(p1).opcode].Ch)<>[];
  624. RS_ESI:
  625. result:=([Ch_RESI,Ch_RRSI,Ch_WESI,Ch_WRSI,Ch_RWESI,Ch_RWRSI,Ch_MESI,Ch_MRSI,Ch_RMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  626. RS_EDI:
  627. result:=([Ch_REDI,Ch_RRDI,Ch_WEDI,Ch_WRDI,Ch_RWEDI,Ch_RWRDI,Ch_MEDI,Ch_MRDI,Ch_WMemEDI]*insprop[taicpu(p1).opcode].Ch)<>[];
  628. else
  629. ;
  630. end;
  631. if result then
  632. exit;
  633. end
  634. else if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  635. begin
  636. if ([Ch_RFlags,Ch_WFlags,Ch_RWFlags,Ch_RFLAGScc]*insprop[taicpu(p1).opcode].Ch)<>[] then
  637. exit(true);
  638. case getsubreg(reg) of
  639. R_SUBFLAGCARRY:
  640. Result:=([Ch_RCarryFlag,Ch_RWCarryFlag,Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  641. R_SUBFLAGPARITY:
  642. Result:=([Ch_RParityFlag,Ch_RWParityFlag,Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  643. R_SUBFLAGAUXILIARY:
  644. Result:=([Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  645. R_SUBFLAGZERO:
  646. Result:=([Ch_RZeroFlag,Ch_RWZeroFlag,Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  647. R_SUBFLAGSIGN:
  648. Result:=([Ch_RSignFlag,Ch_RWSignFlag,Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  649. R_SUBFLAGOVERFLOW:
  650. Result:=([Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag]*insprop[taicpu(p1).opcode].Ch)<>[];
  651. R_SUBFLAGINTERRUPT:
  652. Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  653. R_SUBFLAGDIRECTION:
  654. Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
  655. else
  656. ;
  657. end;
  658. if result then
  659. exit;
  660. end
  661. else if (getregtype(reg)=R_FPUREGISTER) and (Ch_FPU in insprop[taicpu(p1).opcode].Ch) then
  662. exit(true);
  663. Result:=inherited RegInInstruction(Reg, p1);
  664. end;
  665. function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  666. begin
  667. Result := False;
  668. if p1.typ <> ait_instruction then
  669. exit;
  670. with insprop[taicpu(p1).opcode] do
  671. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  672. begin
  673. case getsubreg(reg) of
  674. R_SUBW,R_SUBD,R_SUBQ:
  675. Result :=
  676. [Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
  677. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  678. Ch_W0DirFlag,Ch_W1DirFlag,Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  679. R_SUBFLAGCARRY:
  680. Result:=[Ch_WCarryFlag,Ch_RWCarryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  681. R_SUBFLAGPARITY:
  682. Result:=[Ch_WParityFlag,Ch_RWParityFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  683. R_SUBFLAGAUXILIARY:
  684. Result:=[Ch_WAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  685. R_SUBFLAGZERO:
  686. Result:=[Ch_WZeroFlag,Ch_RWZeroFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  687. R_SUBFLAGSIGN:
  688. Result:=[Ch_WSignFlag,Ch_RWSignFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  689. R_SUBFLAGOVERFLOW:
  690. Result:=[Ch_WOverflowFlag,Ch_RWOverflowFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  691. R_SUBFLAGINTERRUPT:
  692. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  693. R_SUBFLAGDIRECTION:
  694. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags,Ch_RWFlags]*Ch<>[];
  695. else
  696. internalerror(2017042602);
  697. end;
  698. exit;
  699. end;
  700. case taicpu(p1).opcode of
  701. A_CALL:
  702. { We could potentially set Result to False if the register in
  703. question is non-volatile for the subroutine's calling convention,
  704. but this would require detecting the calling convention in use and
  705. also assuming that the routine doesn't contain malformed assembly
  706. language, for example... so it could only be done under -O4 as it
  707. would be considered a side-effect. [Kit] }
  708. Result := True;
  709. A_MOVSD:
  710. { special handling for SSE MOVSD }
  711. if (taicpu(p1).ops>0) then
  712. begin
  713. if taicpu(p1).ops<>2 then
  714. internalerror(2017042703);
  715. Result := (taicpu(p1).oper[1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[1]^);
  716. end;
  717. { VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
  718. so fix it here (FK)
  719. }
  720. A_VMOVSS,
  721. A_VMOVSD:
  722. begin
  723. Result := (taicpu(p1).ops=3) and (taicpu(p1).oper[2]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[2]^);
  724. exit;
  725. end;
  726. A_IMUL:
  727. Result := (taicpu(p1).oper[taicpu(p1).ops-1]^.typ=top_reg) and RegInOp(reg,taicpu(p1).oper[taicpu(p1).ops-1]^);
  728. else
  729. ;
  730. end;
  731. if Result then
  732. exit;
  733. with insprop[taicpu(p1).opcode] do
  734. begin
  735. if getregtype(reg)=R_INTREGISTER then
  736. begin
  737. case getsupreg(reg) of
  738. RS_EAX:
  739. if [Ch_WEAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  740. begin
  741. Result := True;
  742. exit
  743. end;
  744. RS_ECX:
  745. if [Ch_WECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  746. begin
  747. Result := True;
  748. exit
  749. end;
  750. RS_EDX:
  751. if [Ch_WEDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  752. begin
  753. Result := True;
  754. exit
  755. end;
  756. RS_EBX:
  757. if [Ch_WEBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  758. begin
  759. Result := True;
  760. exit
  761. end;
  762. RS_ESP:
  763. if [Ch_WESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  764. begin
  765. Result := True;
  766. exit
  767. end;
  768. RS_EBP:
  769. if [Ch_WEBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  770. begin
  771. Result := True;
  772. exit
  773. end;
  774. RS_ESI:
  775. if [Ch_WESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  776. begin
  777. Result := True;
  778. exit
  779. end;
  780. RS_EDI:
  781. if [Ch_WEDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  782. begin
  783. Result := True;
  784. exit
  785. end;
  786. end;
  787. end;
  788. if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
  789. begin
  790. Result := true;
  791. exit
  792. end;
  793. if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
  794. begin
  795. Result := true;
  796. exit
  797. end;
  798. if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
  799. begin
  800. Result := true;
  801. exit
  802. end;
  803. if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
  804. begin
  805. Result := true;
  806. exit
  807. end;
  808. end;
  809. end;
  810. {$ifdef DEBUG_AOPTCPU}
  811. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  812. begin
  813. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  814. end;
  815. function debug_tostr(i: tcgint): string; inline;
  816. begin
  817. Result := tostr(i);
  818. end;
  819. function debug_regname(r: TRegister): string; inline;
  820. begin
  821. Result := '%' + std_regname(r);
  822. end;
  823. { Debug output function - creates a string representation of an operator }
  824. function debug_operstr(oper: TOper): string;
  825. begin
  826. case oper.typ of
  827. top_const:
  828. Result := '$' + debug_tostr(oper.val);
  829. top_reg:
  830. Result := debug_regname(oper.reg);
  831. top_ref:
  832. begin
  833. if oper.ref^.offset <> 0 then
  834. Result := debug_tostr(oper.ref^.offset) + '('
  835. else
  836. Result := '(';
  837. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  838. begin
  839. Result := Result + debug_regname(oper.ref^.base);
  840. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  841. Result := Result + ',' + debug_regname(oper.ref^.index);
  842. end
  843. else
  844. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  845. Result := Result + debug_regname(oper.ref^.index);
  846. if (oper.ref^.scalefactor > 1) then
  847. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  848. else
  849. Result := Result + ')';
  850. end;
  851. else
  852. Result := '[UNKNOWN]';
  853. end;
  854. end;
  855. function debug_op2str(opcode: tasmop): string; inline;
  856. begin
  857. Result := std_op2str[opcode];
  858. end;
  859. function debug_opsize2str(opsize: topsize): string; inline;
  860. begin
  861. Result := gas_opsize2str[opsize];
  862. end;
  863. {$else DEBUG_AOPTCPU}
  864. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  865. begin
  866. end;
  867. function debug_tostr(i: tcgint): string; inline;
  868. begin
  869. Result := '';
  870. end;
  871. function debug_regname(r: TRegister): string; inline;
  872. begin
  873. Result := '';
  874. end;
  875. function debug_operstr(oper: TOper): string; inline;
  876. begin
  877. Result := '';
  878. end;
  879. function debug_op2str(opcode: tasmop): string; inline;
  880. begin
  881. Result := '';
  882. end;
  883. function debug_opsize2str(opsize: topsize): string; inline;
  884. begin
  885. Result := '';
  886. end;
  887. {$endif DEBUG_AOPTCPU}
  888. class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
  889. begin
  890. {$ifdef x86_64}
  891. { Always fine on x86-64 }
  892. Result := True;
  893. {$else x86_64}
  894. Result :=
  895. {$ifdef i8086}
  896. (current_settings.cputype >= cpu_386) and
  897. {$endif i8086}
  898. (
  899. { Always accept if optimising for size }
  900. (cs_opt_size in current_settings.optimizerswitches) or
  901. { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
  902. (current_settings.optimizecputype >= cpu_Pentium2)
  903. );
  904. {$endif x86_64}
  905. end;
  906. { Attempts to allocate a volatile integer register for use between p and hp,
  907. using AUsedRegs for the current register usage information. Returns NR_NO
  908. if no free register could be found }
  909. function TX86AsmOptimizer.GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  910. var
  911. RegSet: TCPURegisterSet;
  912. CurrentSuperReg: Integer;
  913. CurrentReg: TRegister;
  914. Currentp: tai;
  915. Breakout: Boolean;
  916. begin
  917. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  918. Result := NR_NO;
  919. RegSet := paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption);
  920. for CurrentSuperReg := Low(RegSet) to High(RegSet) do
  921. begin
  922. CurrentReg := newreg(R_INTREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  923. if not AUsedRegs[R_INTREGISTER].IsUsed(CurrentReg) then
  924. begin
  925. Currentp := p;
  926. Breakout := False;
  927. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  928. begin
  929. case Currentp.typ of
  930. ait_instruction:
  931. begin
  932. if RegInInstruction(CurrentReg, Currentp) then
  933. begin
  934. Breakout := True;
  935. Break;
  936. end;
  937. { Cannot allocate across an unconditional jump }
  938. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  939. Exit;
  940. end;
  941. ait_marker:
  942. { Don't try anything more if a marker is hit }
  943. Exit;
  944. ait_regalloc:
  945. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  946. begin
  947. Breakout := True;
  948. Break;
  949. end;
  950. else
  951. ;
  952. end;
  953. end;
  954. if Breakout then
  955. { Try the next register }
  956. Continue;
  957. { We have a free register available }
  958. Result := CurrentReg;
  959. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  960. Exit;
  961. end;
  962. end;
  963. end;
  964. { Attempts to allocate a volatile MM register for use between p and hp,
  965. using AUsedRegs for the current register usage information. Returns NR_NO
  966. if no free register could be found }
  967. function TX86AsmOptimizer.GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
  968. var
  969. RegSet: TCPURegisterSet;
  970. CurrentSuperReg: Integer;
  971. CurrentReg: TRegister;
  972. Currentp: tai;
  973. Breakout: Boolean;
  974. begin
  975. { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
  976. Result := NR_NO;
  977. RegSet := paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption);
  978. for CurrentSuperReg := Low(RegSet) to High(RegSet) do
  979. begin
  980. CurrentReg := newreg(R_MMREGISTER, TSuperRegister(CurrentSuperReg), RegSize);
  981. if not AUsedRegs[R_MMREGISTER].IsUsed(CurrentReg) then
  982. begin
  983. Currentp := p;
  984. Breakout := False;
  985. while not Breakout and GetNextInstruction(Currentp, Currentp) and (Currentp <> hp) do
  986. begin
  987. case Currentp.typ of
  988. ait_instruction:
  989. begin
  990. if RegInInstruction(CurrentReg, Currentp) then
  991. begin
  992. Breakout := True;
  993. Break;
  994. end;
  995. { Cannot allocate across an unconditional jump }
  996. if is_calljmpuncondret(taicpu(Currentp).opcode) then
  997. Exit;
  998. end;
  999. ait_marker:
  1000. { Don't try anything more if a marker is hit }
  1001. Exit;
  1002. ait_regalloc:
  1003. if (tai_regalloc(Currentp).ratype <> ra_dealloc) and SuperRegistersEqual(CurrentReg, tai_regalloc(Currentp).reg) then
  1004. begin
  1005. Breakout := True;
  1006. Break;
  1007. end;
  1008. else
  1009. ;
  1010. end;
  1011. end;
  1012. if Breakout then
  1013. { Try the next register }
  1014. Continue;
  1015. { We have a free register available }
  1016. Result := CurrentReg;
  1017. AllocRegBetween(CurrentReg, p, hp, AUsedRegs);
  1018. Exit;
  1019. end;
  1020. end;
  1021. end;
  1022. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  1023. begin
  1024. if not SuperRegistersEqual(reg1,reg2) then
  1025. exit(false);
  1026. if getregtype(reg1)<>R_INTREGISTER then
  1027. exit(true); {because SuperRegisterEqual is true}
  1028. case getsubreg(reg1) of
  1029. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  1030. higher, it preserves the high bits, so the new value depends on
  1031. reg2's previous value. In other words, it is equivalent to doing:
  1032. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  1033. R_SUBL:
  1034. exit(getsubreg(reg2)=R_SUBL);
  1035. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  1036. higher, it actually does a:
  1037. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  1038. R_SUBH:
  1039. exit(getsubreg(reg2)=R_SUBH);
  1040. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  1041. bits of reg2:
  1042. reg2 := (reg2 and $ffff0000) or word(reg1); }
  1043. R_SUBW:
  1044. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  1045. { a write to R_SUBD always overwrites every other subregister,
  1046. because it clears the high 32 bits of R_SUBQ on x86_64 }
  1047. R_SUBD,
  1048. R_SUBQ:
  1049. exit(true);
  1050. else
  1051. internalerror(2017042801);
  1052. end;
  1053. end;
  1054. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  1055. begin
  1056. if not SuperRegistersEqual(reg1,reg2) then
  1057. exit(false);
  1058. if getregtype(reg1)<>R_INTREGISTER then
  1059. exit(true); {because SuperRegisterEqual is true}
  1060. case getsubreg(reg1) of
  1061. R_SUBL:
  1062. exit(getsubreg(reg2)<>R_SUBH);
  1063. R_SUBH:
  1064. exit(getsubreg(reg2)<>R_SUBL);
  1065. R_SUBW,
  1066. R_SUBD,
  1067. R_SUBQ:
  1068. exit(true);
  1069. else
  1070. internalerror(2017042802);
  1071. end;
  1072. end;
  1073. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  1074. var
  1075. hp1 : tai;
  1076. l : TCGInt;
  1077. begin
  1078. result:=false;
  1079. { changes the code sequence
  1080. shr/sar const1, x
  1081. shl const2, x
  1082. to
  1083. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  1084. if GetNextInstruction(p, hp1) and
  1085. MatchInstruction(hp1,A_SHL,[]) and
  1086. (taicpu(p).oper[0]^.typ = top_const) and
  1087. (taicpu(hp1).oper[0]^.typ = top_const) and
  1088. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1089. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  1090. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  1091. begin
  1092. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  1093. not(cs_opt_size in current_settings.optimizerswitches) then
  1094. begin
  1095. { shr/sar const1, %reg
  1096. shl const2, %reg
  1097. with const1 > const2 }
  1098. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1099. taicpu(hp1).opcode := A_AND;
  1100. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  1101. case taicpu(p).opsize Of
  1102. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  1103. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  1104. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  1105. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1106. else
  1107. Internalerror(2017050703)
  1108. end;
  1109. end
  1110. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  1111. not(cs_opt_size in current_settings.optimizerswitches) then
  1112. begin
  1113. { shr/sar const1, %reg
  1114. shl const2, %reg
  1115. with const1 < const2 }
  1116. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  1117. taicpu(p).opcode := A_AND;
  1118. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1119. case taicpu(p).opsize Of
  1120. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1121. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1122. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1123. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1124. else
  1125. Internalerror(2017050702)
  1126. end;
  1127. end
  1128. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  1129. begin
  1130. { shr/sar const1, %reg
  1131. shl const2, %reg
  1132. with const1 = const2 }
  1133. taicpu(p).opcode := A_AND;
  1134. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  1135. case taicpu(p).opsize Of
  1136. S_B: taicpu(p).loadConst(0,l Xor $ff);
  1137. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  1138. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  1139. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  1140. else
  1141. Internalerror(2017050701)
  1142. end;
  1143. RemoveInstruction(hp1);
  1144. end;
  1145. end;
  1146. end;
  1147. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  1148. var
  1149. opsize : topsize;
  1150. hp1 : tai;
  1151. tmpref : treference;
  1152. ShiftValue : Cardinal;
  1153. BaseValue : TCGInt;
  1154. begin
  1155. result:=false;
  1156. opsize:=taicpu(p).opsize;
  1157. { changes certain "imul const, %reg"'s to lea sequences }
  1158. if (MatchOpType(taicpu(p),top_const,top_reg) or
  1159. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  1160. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  1161. if (taicpu(p).oper[0]^.val = 1) then
  1162. if (taicpu(p).ops = 2) then
  1163. { remove "imul $1, reg" }
  1164. begin
  1165. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  1166. Result := RemoveCurrentP(p);
  1167. end
  1168. else
  1169. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  1170. begin
  1171. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  1172. InsertLLItem(p.previous, p.next, hp1);
  1173. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  1174. p.free;
  1175. p := hp1;
  1176. end
  1177. else if ((taicpu(p).ops <= 2) or
  1178. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  1179. not(cs_opt_size in current_settings.optimizerswitches) and
  1180. (not(GetNextInstruction(p, hp1)) or
  1181. not((tai(hp1).typ = ait_instruction) and
  1182. ((taicpu(hp1).opcode=A_Jcc) and
  1183. (taicpu(hp1).condition in [C_O,C_NO])))) then
  1184. begin
  1185. {
  1186. imul X, reg1, reg2 to
  1187. lea (reg1,reg1,Y), reg2
  1188. shl ZZ,reg2
  1189. imul XX, reg1 to
  1190. lea (reg1,reg1,YY), reg1
  1191. shl ZZ,reg2
  1192. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  1193. it does not exist as a separate optimization target in FPC though.
  1194. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  1195. at most two zeros
  1196. }
  1197. reference_reset(tmpref,1,[]);
  1198. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  1199. begin
  1200. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  1201. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  1202. TmpRef.base := taicpu(p).oper[1]^.reg;
  1203. TmpRef.index := taicpu(p).oper[1]^.reg;
  1204. if not(BaseValue in [3,5,9]) then
  1205. Internalerror(2018110101);
  1206. TmpRef.ScaleFactor := BaseValue-1;
  1207. if (taicpu(p).ops = 2) then
  1208. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  1209. else
  1210. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  1211. AsmL.InsertAfter(hp1,p);
  1212. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  1213. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  1214. RemoveCurrentP(p, hp1);
  1215. if ShiftValue>0 then
  1216. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  1217. end;
  1218. end;
  1219. end;
  1220. function TX86AsmOptimizer.PrePeepholeOptAND(var p : tai) : boolean;
  1221. begin
  1222. Result := False;
  1223. if MatchOperand(taicpu(p).oper[0]^, 0) and
  1224. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  1225. begin
  1226. DebugMsg(SPeepholeOptimization + 'AND 0 -> MOV 0', p);
  1227. taicpu(p).opcode := A_MOV;
  1228. Result := True;
  1229. end;
  1230. end;
  1231. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  1232. var
  1233. p: taicpu absolute hp;
  1234. i: Integer;
  1235. begin
  1236. Result := False;
  1237. if not assigned(hp) or
  1238. (hp.typ <> ait_instruction) then
  1239. Exit;
  1240. // p := taicpu(hp);
  1241. Prefetch(insprop[p.opcode]);
  1242. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  1243. with insprop[p.opcode] do
  1244. begin
  1245. case getsubreg(reg) of
  1246. R_SUBW,R_SUBD,R_SUBQ:
  1247. Result:=
  1248. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  1249. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  1250. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  1251. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  1252. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  1253. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  1254. R_SUBFLAGCARRY:
  1255. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  1256. R_SUBFLAGPARITY:
  1257. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  1258. R_SUBFLAGAUXILIARY:
  1259. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  1260. R_SUBFLAGZERO:
  1261. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  1262. R_SUBFLAGSIGN:
  1263. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  1264. R_SUBFLAGOVERFLOW:
  1265. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  1266. R_SUBFLAGINTERRUPT:
  1267. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  1268. R_SUBFLAGDIRECTION:
  1269. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  1270. else
  1271. begin
  1272. writeln(getsubreg(reg));
  1273. internalerror(2017050501);
  1274. end;
  1275. end;
  1276. exit;
  1277. end;
  1278. { Handle special cases first }
  1279. case p.opcode of
  1280. A_MOV, A_MOVZX, A_MOVSX, A_LEA, A_VMOVSS, A_VMOVSD, A_VMOVAPD,
  1281. A_VMOVAPS, A_VMOVQ, A_MOVSS, A_MOVSD, A_MOVQ, A_MOVAPD, A_MOVAPS:
  1282. begin
  1283. Result :=
  1284. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  1285. (p.oper[1]^.typ = top_reg) and
  1286. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  1287. (
  1288. (p.oper[0]^.typ = top_const) or
  1289. (
  1290. (p.oper[0]^.typ = top_reg) and
  1291. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))
  1292. ) or (
  1293. (p.oper[0]^.typ = top_ref) and
  1294. not RegInRef(reg,p.oper[0]^.ref^)
  1295. )
  1296. );
  1297. end;
  1298. A_MUL, A_IMUL:
  1299. Result :=
  1300. (
  1301. (p.ops=3) and { IMUL only }
  1302. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  1303. (
  1304. (
  1305. (p.oper[1]^.typ=top_reg) and
  1306. not Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg)
  1307. ) or (
  1308. (p.oper[1]^.typ=top_ref) and
  1309. not RegInRef(reg,p.oper[1]^.ref^)
  1310. )
  1311. )
  1312. ) or (
  1313. (
  1314. (p.ops=1) and
  1315. (
  1316. (
  1317. (
  1318. (p.oper[0]^.typ=top_reg) and
  1319. not Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg)
  1320. )
  1321. ) or (
  1322. (p.oper[0]^.typ=top_ref) and
  1323. not RegInRef(reg,p.oper[0]^.ref^)
  1324. )
  1325. ) and (
  1326. (
  1327. (p.opsize=S_B) and
  1328. Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and
  1329. not Reg1ReadDependsOnReg2(NR_AL,reg)
  1330. ) or (
  1331. (p.opsize=S_W) and
  1332. Reg1WriteOverwritesReg2Entirely(NR_DX,reg)
  1333. ) or (
  1334. (p.opsize=S_L) and
  1335. Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)
  1336. {$ifdef x86_64}
  1337. ) or (
  1338. (p.opsize=S_Q) and
  1339. Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)
  1340. {$endif x86_64}
  1341. )
  1342. )
  1343. )
  1344. );
  1345. A_CBW:
  1346. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg));
  1347. {$ifndef x86_64}
  1348. A_LDS:
  1349. Result := (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1350. A_LES:
  1351. Result := (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^));
  1352. {$endif not x86_64}
  1353. A_LFS:
  1354. Result := (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1355. A_LGS:
  1356. Result := (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1357. A_LSS:
  1358. Result := (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^));
  1359. A_LAHF{$ifndef x86_64}, A_AAM{$endif not x86_64}:
  1360. Result := Reg1WriteOverwritesReg2Entirely(NR_AH,reg);
  1361. A_LODSB:
  1362. Result := Reg1WriteOverwritesReg2Entirely(NR_AL,reg);
  1363. A_LODSW:
  1364. Result := Reg1WriteOverwritesReg2Entirely(NR_AX,reg);
  1365. {$ifdef x86_64}
  1366. A_LODSQ:
  1367. Result := Reg1WriteOverwritesReg2Entirely(NR_RAX,reg);
  1368. {$endif x86_64}
  1369. A_LODSD:
  1370. Result := Reg1WriteOverwritesReg2Entirely(NR_EAX,reg);
  1371. A_FSTSW, A_FNSTSW:
  1372. Result := (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg);
  1373. else
  1374. begin
  1375. with insprop[p.opcode] do
  1376. begin
  1377. if (
  1378. { xor %reg,%reg etc. is classed as a new value }
  1379. (([Ch_NoReadIfEqualRegs]*Ch)<>[]) and
  1380. MatchOpType(p, top_reg, top_reg) and
  1381. (p.oper[0]^.reg = p.oper[1]^.reg) and
  1382. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)
  1383. ) then
  1384. begin
  1385. Result := True;
  1386. Exit;
  1387. end;
  1388. { Make sure the entire register is overwritten }
  1389. if (getregtype(reg) = R_INTREGISTER) then
  1390. begin
  1391. if (p.ops > 0) then
  1392. begin
  1393. if RegInOp(reg, p.oper[0]^) then
  1394. begin
  1395. if (p.oper[0]^.typ = top_ref) then
  1396. begin
  1397. if RegInRef(reg, p.oper[0]^.ref^) then
  1398. begin
  1399. Result := False;
  1400. Exit;
  1401. end;
  1402. end
  1403. else if (p.oper[0]^.typ = top_reg) then
  1404. begin
  1405. if ([Ch_ROp1, Ch_RWOp1, Ch_MOp1]*Ch<>[]) then
  1406. begin
  1407. Result := False;
  1408. Exit;
  1409. end
  1410. else if ([Ch_WOp1]*Ch<>[]) then
  1411. begin
  1412. if Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg, reg) then
  1413. Result := True
  1414. else
  1415. begin
  1416. Result := False;
  1417. Exit;
  1418. end;
  1419. end;
  1420. end;
  1421. end;
  1422. if (p.ops > 1) then
  1423. begin
  1424. if RegInOp(reg, p.oper[1]^) then
  1425. begin
  1426. if (p.oper[1]^.typ = top_ref) then
  1427. begin
  1428. if RegInRef(reg, p.oper[1]^.ref^) then
  1429. begin
  1430. Result := False;
  1431. Exit;
  1432. end;
  1433. end
  1434. else if (p.oper[1]^.typ = top_reg) then
  1435. begin
  1436. if ([Ch_ROp2, Ch_RWOp2, Ch_MOp2]*Ch<>[]) then
  1437. begin
  1438. Result := False;
  1439. Exit;
  1440. end
  1441. else if ([Ch_WOp2]*Ch<>[]) then
  1442. begin
  1443. if Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg, reg) then
  1444. Result := True
  1445. else
  1446. begin
  1447. Result := False;
  1448. Exit;
  1449. end;
  1450. end;
  1451. end;
  1452. end;
  1453. if (p.ops > 2) then
  1454. begin
  1455. if RegInOp(reg, p.oper[2]^) then
  1456. begin
  1457. if (p.oper[2]^.typ = top_ref) then
  1458. begin
  1459. if RegInRef(reg, p.oper[2]^.ref^) then
  1460. begin
  1461. Result := False;
  1462. Exit;
  1463. end;
  1464. end
  1465. else if (p.oper[2]^.typ = top_reg) then
  1466. begin
  1467. if ([Ch_ROp3, Ch_RWOp3, Ch_MOp3]*Ch<>[]) then
  1468. begin
  1469. Result := False;
  1470. Exit;
  1471. end
  1472. else if ([Ch_WOp3]*Ch<>[]) then
  1473. begin
  1474. if Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg, reg) then
  1475. Result := True
  1476. else
  1477. begin
  1478. Result := False;
  1479. Exit;
  1480. end;
  1481. end;
  1482. end;
  1483. end;
  1484. if (p.ops > 3) and RegInOp(reg, p.oper[3]^) then
  1485. begin
  1486. if (p.oper[3]^.typ = top_ref) then
  1487. begin
  1488. if RegInRef(reg, p.oper[3]^.ref^) then
  1489. begin
  1490. Result := False;
  1491. Exit;
  1492. end;
  1493. end
  1494. else if (p.oper[3]^.typ = top_reg) then
  1495. begin
  1496. if ([Ch_ROp4, Ch_RWOp4, Ch_MOp4]*Ch<>[]) then
  1497. begin
  1498. Result := False;
  1499. Exit;
  1500. end
  1501. else if ([Ch_WOp4]*Ch<>[]) then
  1502. begin
  1503. if Reg1WriteOverwritesReg2Entirely(p.oper[3]^.reg, reg) then
  1504. Result := True
  1505. else
  1506. begin
  1507. Result := False;
  1508. Exit;
  1509. end;
  1510. end;
  1511. end;
  1512. end;
  1513. end;
  1514. end;
  1515. end;
  1516. { Don't do these ones first in case an input operand is equal to an explicit output registers }
  1517. case getsupreg(reg) of
  1518. RS_EAX:
  1519. if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
  1520. begin
  1521. Result := True;
  1522. Exit;
  1523. end;
  1524. RS_ECX:
  1525. if ([Ch_WECX{$ifdef x86_64},Ch_WRCX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ECX, reg) then
  1526. begin
  1527. Result := True;
  1528. Exit;
  1529. end;
  1530. RS_EDX:
  1531. if ([Ch_REDX{$ifdef x86_64},Ch_WRDX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDX, reg) then
  1532. begin
  1533. Result := True;
  1534. Exit;
  1535. end;
  1536. RS_EBX:
  1537. if ([Ch_WEBX{$ifdef x86_64},Ch_WRBX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBX, reg) then
  1538. begin
  1539. Result := True;
  1540. Exit;
  1541. end;
  1542. RS_ESP:
  1543. if ([Ch_WESP{$ifdef x86_64},Ch_WRSP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESP, reg) then
  1544. begin
  1545. Result := True;
  1546. Exit;
  1547. end;
  1548. RS_EBP:
  1549. if ([Ch_WEBP{$ifdef x86_64},Ch_WRBP{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EBP, reg) then
  1550. begin
  1551. Result := True;
  1552. Exit;
  1553. end;
  1554. RS_ESI:
  1555. if ([Ch_WESI{$ifdef x86_64},Ch_WRSI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_ESI, reg) then
  1556. begin
  1557. Result := True;
  1558. Exit;
  1559. end;
  1560. RS_EDI:
  1561. if ([Ch_WEDI{$ifdef x86_64},Ch_WRDI{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EDI, reg) then
  1562. begin
  1563. Result := True;
  1564. Exit;
  1565. end;
  1566. else
  1567. ;
  1568. end;
  1569. end;
  1570. end;
  1571. end;
  1572. end;
  1573. end;
  1574. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  1575. var
  1576. hp2,hp3 : tai;
  1577. begin
  1578. { some x86-64 issue a NOP before the real exit code }
  1579. if MatchInstruction(p,A_NOP,[]) then
  1580. GetNextInstruction(p,p);
  1581. result:=assigned(p) and (p.typ=ait_instruction) and
  1582. ((taicpu(p).opcode = A_RET) or
  1583. ((taicpu(p).opcode=A_LEAVE) and
  1584. GetNextInstruction(p,hp2) and
  1585. MatchInstruction(hp2,A_RET,[S_NO])
  1586. ) or
  1587. (((taicpu(p).opcode=A_LEA) and
  1588. MatchOpType(taicpu(p),top_ref,top_reg) and
  1589. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  1590. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1591. ) and
  1592. GetNextInstruction(p,hp2) and
  1593. MatchInstruction(hp2,A_RET,[S_NO])
  1594. ) or
  1595. ((((taicpu(p).opcode=A_MOV) and
  1596. MatchOpType(taicpu(p),top_reg,top_reg) and
  1597. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  1598. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  1599. ((taicpu(p).opcode=A_LEA) and
  1600. MatchOpType(taicpu(p),top_ref,top_reg) and
  1601. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  1602. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  1603. )
  1604. ) and
  1605. GetNextInstruction(p,hp2) and
  1606. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  1607. MatchOpType(taicpu(hp2),top_reg) and
  1608. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  1609. GetNextInstruction(hp2,hp3) and
  1610. MatchInstruction(hp3,A_RET,[S_NO])
  1611. )
  1612. );
  1613. end;
  1614. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1615. begin
  1616. isFoldableArithOp := False;
  1617. case hp1.opcode of
  1618. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1619. isFoldableArithOp :=
  1620. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1621. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1622. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1623. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1624. (taicpu(hp1).oper[1]^.reg = reg);
  1625. A_INC,A_DEC,A_NEG,A_NOT:
  1626. isFoldableArithOp :=
  1627. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1628. (taicpu(hp1).oper[0]^.reg = reg);
  1629. else
  1630. ;
  1631. end;
  1632. end;
  1633. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  1634. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  1635. var
  1636. hp2: tai;
  1637. begin
  1638. hp2 := p;
  1639. repeat
  1640. hp2 := tai(hp2.previous);
  1641. if assigned(hp2) and
  1642. (hp2.typ = ait_regalloc) and
  1643. (tai_regalloc(hp2).ratype=ra_dealloc) and
  1644. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  1645. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  1646. begin
  1647. RemoveInstruction(hp2);
  1648. break;
  1649. end;
  1650. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  1651. end;
  1652. begin
  1653. case current_procinfo.procdef.returndef.typ of
  1654. arraydef,recorddef,pointerdef,
  1655. stringdef,enumdef,procdef,objectdef,errordef,
  1656. filedef,setdef,procvardef,
  1657. classrefdef,forwarddef:
  1658. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1659. orddef:
  1660. if current_procinfo.procdef.returndef.size <> 0 then
  1661. begin
  1662. DoRemoveLastDeallocForFuncRes(RS_EAX);
  1663. { for int64/qword }
  1664. if current_procinfo.procdef.returndef.size = 8 then
  1665. DoRemoveLastDeallocForFuncRes(RS_EDX);
  1666. end;
  1667. else
  1668. ;
  1669. end;
  1670. end;
  1671. function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
  1672. var
  1673. hp1,hp2 : tai;
  1674. begin
  1675. result:=false;
  1676. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1677. begin
  1678. { vmova* reg1,reg1
  1679. =>
  1680. <nop> }
  1681. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1682. begin
  1683. RemoveCurrentP(p);
  1684. result:=true;
  1685. exit;
  1686. end
  1687. else if GetNextInstruction(p,hp1) then
  1688. begin
  1689. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1690. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1691. begin
  1692. { vmova* reg1,reg2
  1693. vmova* reg2,reg3
  1694. dealloc reg2
  1695. =>
  1696. vmova* reg1,reg3 }
  1697. TransferUsedRegs(TmpUsedRegs);
  1698. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1699. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1700. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1701. begin
  1702. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 1',p);
  1703. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1704. RemoveInstruction(hp1);
  1705. result:=true;
  1706. exit;
  1707. end
  1708. { special case:
  1709. vmova* reg1,<op>
  1710. vmova* <op>,reg1
  1711. =>
  1712. vmova* reg1,<op> }
  1713. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1714. ((taicpu(p).oper[0]^.typ<>top_ref) or
  1715. (not(vol_read in taicpu(p).oper[0]^.ref^.volatility))
  1716. ) then
  1717. begin
  1718. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVA*2(V)MOVA* 2',p);
  1719. RemoveInstruction(hp1);
  1720. result:=true;
  1721. exit;
  1722. end
  1723. end
  1724. else if ((MatchInstruction(p,[A_MOVAPS,A_VMOVAPS],[S_NO]) and
  1725. MatchInstruction(hp1,[A_MOVSS,A_VMOVSS],[S_NO])) or
  1726. ((MatchInstruction(p,[A_MOVAPD,A_VMOVAPD],[S_NO]) and
  1727. MatchInstruction(hp1,[A_MOVSD,A_VMOVSD],[S_NO])))
  1728. ) and
  1729. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1730. begin
  1731. { vmova* reg1,reg2
  1732. vmovs* reg2,<op>
  1733. dealloc reg2
  1734. =>
  1735. vmovs* reg1,reg3 }
  1736. TransferUsedRegs(TmpUsedRegs);
  1737. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1738. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1739. begin
  1740. DebugMsg(SPeepholeOptimization + '(V)MOVA*(V)MOVS*2(V)MOVS* 1',p);
  1741. taicpu(p).opcode:=taicpu(hp1).opcode;
  1742. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1743. RemoveInstruction(hp1);
  1744. result:=true;
  1745. exit;
  1746. end
  1747. end;
  1748. end;
  1749. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  1750. begin
  1751. if MatchInstruction(hp1,[A_VFMADDPD,
  1752. A_VFMADD132PD,
  1753. A_VFMADD132PS,
  1754. A_VFMADD132SD,
  1755. A_VFMADD132SS,
  1756. A_VFMADD213PD,
  1757. A_VFMADD213PS,
  1758. A_VFMADD213SD,
  1759. A_VFMADD213SS,
  1760. A_VFMADD231PD,
  1761. A_VFMADD231PS,
  1762. A_VFMADD231SD,
  1763. A_VFMADD231SS,
  1764. A_VFMADDSUB132PD,
  1765. A_VFMADDSUB132PS,
  1766. A_VFMADDSUB213PD,
  1767. A_VFMADDSUB213PS,
  1768. A_VFMADDSUB231PD,
  1769. A_VFMADDSUB231PS,
  1770. A_VFMSUB132PD,
  1771. A_VFMSUB132PS,
  1772. A_VFMSUB132SD,
  1773. A_VFMSUB132SS,
  1774. A_VFMSUB213PD,
  1775. A_VFMSUB213PS,
  1776. A_VFMSUB213SD,
  1777. A_VFMSUB213SS,
  1778. A_VFMSUB231PD,
  1779. A_VFMSUB231PS,
  1780. A_VFMSUB231SD,
  1781. A_VFMSUB231SS,
  1782. A_VFMSUBADD132PD,
  1783. A_VFMSUBADD132PS,
  1784. A_VFMSUBADD213PD,
  1785. A_VFMSUBADD213PS,
  1786. A_VFMSUBADD231PD,
  1787. A_VFMSUBADD231PS,
  1788. A_VFNMADD132PD,
  1789. A_VFNMADD132PS,
  1790. A_VFNMADD132SD,
  1791. A_VFNMADD132SS,
  1792. A_VFNMADD213PD,
  1793. A_VFNMADD213PS,
  1794. A_VFNMADD213SD,
  1795. A_VFNMADD213SS,
  1796. A_VFNMADD231PD,
  1797. A_VFNMADD231PS,
  1798. A_VFNMADD231SD,
  1799. A_VFNMADD231SS,
  1800. A_VFNMSUB132PD,
  1801. A_VFNMSUB132PS,
  1802. A_VFNMSUB132SD,
  1803. A_VFNMSUB132SS,
  1804. A_VFNMSUB213PD,
  1805. A_VFNMSUB213PS,
  1806. A_VFNMSUB213SD,
  1807. A_VFNMSUB213SS,
  1808. A_VFNMSUB231PD,
  1809. A_VFNMSUB231PS,
  1810. A_VFNMSUB231SD,
  1811. A_VFNMSUB231SS],[S_NO]) and
  1812. { we mix single and double opperations here because we assume that the compiler
  1813. generates vmovapd only after double operations and vmovaps only after single operations }
  1814. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1815. GetNextInstruction(hp1,hp2) and
  1816. MatchInstruction(hp2,[A_VMOVAPD,A_VMOVAPS,A_MOVAPD,A_MOVAPS],[S_NO]) and
  1817. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1818. begin
  1819. TransferUsedRegs(TmpUsedRegs);
  1820. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1821. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1822. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1823. begin
  1824. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1825. RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit]
  1826. RemoveInstruction(hp2);
  1827. end;
  1828. end
  1829. else if (hp1.typ = ait_instruction) and
  1830. GetNextInstruction(hp1, hp2) and
  1831. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  1832. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1833. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1834. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1835. (((taicpu(p).opcode=A_MOVAPS) and
  1836. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  1837. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  1838. ((taicpu(p).opcode=A_MOVAPD) and
  1839. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  1840. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  1841. ) then
  1842. { change
  1843. movapX reg,reg2
  1844. addsX/subsX/... reg3, reg2
  1845. movapX reg2,reg
  1846. to
  1847. addsX/subsX/... reg3,reg
  1848. }
  1849. begin
  1850. TransferUsedRegs(TmpUsedRegs);
  1851. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1852. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1853. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1854. begin
  1855. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  1856. debug_op2str(taicpu(p).opcode)+' '+
  1857. debug_op2str(taicpu(hp1).opcode)+' '+
  1858. debug_op2str(taicpu(hp2).opcode)+') done',p);
  1859. { we cannot eliminate the first move if
  1860. the operations uses the same register for source and dest }
  1861. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  1862. RemoveCurrentP(p, nil);
  1863. p:=hp1;
  1864. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1865. RemoveInstruction(hp2);
  1866. result:=true;
  1867. end;
  1868. end;
  1869. end;
  1870. end;
  1871. end;
  1872. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1873. var
  1874. hp1 : tai;
  1875. begin
  1876. result:=false;
  1877. { replace
  1878. V<Op>X %mreg1,%mreg2,%mreg3
  1879. VMovX %mreg3,%mreg4
  1880. dealloc %mreg3
  1881. by
  1882. V<Op>X %mreg1,%mreg2,%mreg4
  1883. ?
  1884. }
  1885. if GetNextInstruction(p,hp1) and
  1886. { we mix single and double operations here because we assume that the compiler
  1887. generates vmovapd only after double operations and vmovaps only after single operations }
  1888. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1889. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1890. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1891. begin
  1892. TransferUsedRegs(TmpUsedRegs);
  1893. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1894. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  1895. begin
  1896. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1897. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1898. RemoveInstruction(hp1);
  1899. result:=true;
  1900. end;
  1901. end;
  1902. end;
  1903. { Replaces all references to AOldReg in a memory reference to ANewReg }
  1904. class function TX86AsmOptimizer.ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean;
  1905. begin
  1906. Result := False;
  1907. { For safety reasons, only check for exact register matches }
  1908. { Check base register }
  1909. if (ref.base = AOldReg) then
  1910. begin
  1911. ref.base := ANewReg;
  1912. Result := True;
  1913. end;
  1914. { Check index register }
  1915. if (ref.index = AOldReg) then
  1916. begin
  1917. ref.index := ANewReg;
  1918. Result := True;
  1919. end;
  1920. end;
  1921. { Replaces all references to AOldReg in an operand to ANewReg }
  1922. class function TX86AsmOptimizer.ReplaceRegisterInOper(const p: taicpu; const OperIdx: Integer; const AOldReg, ANewReg: TRegister): Boolean;
  1923. var
  1924. OldSupReg, NewSupReg: TSuperRegister;
  1925. OldSubReg, NewSubReg: TSubRegister;
  1926. OldRegType: TRegisterType;
  1927. ThisOper: POper;
  1928. begin
  1929. ThisOper := p.oper[OperIdx]; { Faster to access overall }
  1930. Result := False;
  1931. if (AOldReg = NR_NO) or (ANewReg = NR_NO) then
  1932. InternalError(2020011801);
  1933. OldSupReg := getsupreg(AOldReg);
  1934. OldSubReg := getsubreg(AOldReg);
  1935. OldRegType := getregtype(AOldReg);
  1936. NewSupReg := getsupreg(ANewReg);
  1937. NewSubReg := getsubreg(ANewReg);
  1938. if OldRegType <> getregtype(ANewReg) then
  1939. InternalError(2020011802);
  1940. if OldSubReg <> NewSubReg then
  1941. InternalError(2020011803);
  1942. case ThisOper^.typ of
  1943. top_reg:
  1944. if (
  1945. (ThisOper^.reg = AOldReg) or
  1946. (
  1947. (OldRegType = R_INTREGISTER) and
  1948. (getsupreg(ThisOper^.reg) = OldSupReg) and
  1949. (getregtype(ThisOper^.reg) = R_INTREGISTER) and
  1950. (
  1951. (getsubreg(ThisOper^.reg) <= OldSubReg)
  1952. {$ifndef x86_64}
  1953. and (
  1954. { Under i386 and i8086, ESI, EDI, EBP and ESP
  1955. don't have an 8-bit representation }
  1956. (getsubreg(ThisOper^.reg) >= R_SUBW) or
  1957. not (NewSupReg in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  1958. )
  1959. {$endif x86_64}
  1960. )
  1961. )
  1962. ) then
  1963. begin
  1964. ThisOper^.reg := newreg(getregtype(ANewReg), NewSupReg, getsubreg(p.oper[OperIdx]^.reg));
  1965. Result := True;
  1966. end;
  1967. top_ref:
  1968. if ReplaceRegisterInRef(ThisOper^.ref^, AOldReg, ANewReg) then
  1969. Result := True;
  1970. else
  1971. ;
  1972. end;
  1973. end;
  1974. { Replaces all references to AOldReg in an instruction to ANewReg }
  1975. function TX86AsmOptimizer.ReplaceRegisterInInstruction(const p: taicpu; const AOldReg, ANewReg: TRegister): Boolean;
  1976. const
  1977. ReadFlag: array[0..3] of TInsChange = (Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Rop4);
  1978. var
  1979. OperIdx: Integer;
  1980. begin
  1981. Result := False;
  1982. for OperIdx := 0 to p.ops - 1 do
  1983. if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
  1984. { The shift and rotate instructions can only use CL }
  1985. not (
  1986. (OperIdx = 0) and
  1987. { This second condition just helps to avoid unnecessarily
  1988. calling MatchInstruction for 10 different opcodes }
  1989. (p.oper[0]^.reg = NR_CL) and
  1990. MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
  1991. ) then
  1992. Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
  1993. end;
  1994. class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
  1995. begin
  1996. Result :=
  1997. (ref^.index = NR_NO) and
  1998. (
  1999. {$ifdef x86_64}
  2000. (
  2001. (ref^.base = NR_RIP) and
  2002. (ref^.refaddr in [addr_pic, addr_pic_no_got])
  2003. ) or
  2004. {$endif x86_64}
  2005. (ref^.base = NR_STACK_POINTER_REG) or
  2006. (ref^.base = current_procinfo.framepointer)
  2007. );
  2008. end;
  2009. function TX86AsmOptimizer.ConvertLEA(const p: taicpu): Boolean;
  2010. var
  2011. l: asizeint;
  2012. begin
  2013. Result := False;
  2014. { Should have been checked previously }
  2015. if p.opcode <> A_LEA then
  2016. InternalError(2020072501);
  2017. { do not mess with the stack point as adjusting it by lea is recommend, except if we optimize for size }
  2018. if (p.oper[1]^.reg=NR_STACK_POINTER_REG) and
  2019. not(cs_opt_size in current_settings.optimizerswitches) then
  2020. exit;
  2021. with p.oper[0]^.ref^ do
  2022. begin
  2023. if (base <> p.oper[1]^.reg) or
  2024. (index <> NR_NO) or
  2025. assigned(symbol) then
  2026. exit;
  2027. l:=offset;
  2028. if (l=1) and UseIncDec then
  2029. begin
  2030. p.opcode:=A_INC;
  2031. p.loadreg(0,p.oper[1]^.reg);
  2032. p.ops:=1;
  2033. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  2034. end
  2035. else if (l=-1) and UseIncDec then
  2036. begin
  2037. p.opcode:=A_DEC;
  2038. p.loadreg(0,p.oper[1]^.reg);
  2039. p.ops:=1;
  2040. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  2041. end
  2042. else
  2043. begin
  2044. if (l<0) and (l<>-2147483648) then
  2045. begin
  2046. p.opcode:=A_SUB;
  2047. p.loadConst(0,-l);
  2048. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  2049. end
  2050. else
  2051. begin
  2052. p.opcode:=A_ADD;
  2053. p.loadConst(0,l);
  2054. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2055. end;
  2056. end;
  2057. end;
  2058. Result := True;
  2059. end;
  2060. function TX86AsmOptimizer.DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean;
  2061. var
  2062. CurrentReg, ReplaceReg: TRegister;
  2063. begin
  2064. Result := False;
  2065. ReplaceReg := taicpu(p_mov).oper[0]^.reg;
  2066. CurrentReg := taicpu(p_mov).oper[1]^.reg;
  2067. case hp.opcode of
  2068. A_FSTSW, A_FNSTSW,
  2069. A_IN, A_INS, A_OUT, A_OUTS,
  2070. A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
  2071. { These routines have explicit operands, but they are restricted in
  2072. what they can be (e.g. IN and OUT can only read from AL, AX or
  2073. EAX. }
  2074. Exit;
  2075. A_IMUL:
  2076. begin
  2077. { The 1-operand version writes to implicit registers
  2078. The 2-operand version reads from the first operator, and reads
  2079. from and writes to the second (equivalent to Ch_ROp1, ChRWOp2).
  2080. the 3-operand version reads from a register that it doesn't write to
  2081. }
  2082. case hp.ops of
  2083. 1:
  2084. if (
  2085. (
  2086. (hp.opsize = S_B) and (getsupreg(CurrentReg) <> RS_EAX)
  2087. ) or
  2088. not (getsupreg(CurrentReg) in [RS_EAX, RS_EDX])
  2089. ) and ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2090. begin
  2091. Result := True;
  2092. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 1)', hp);
  2093. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2094. end;
  2095. 2:
  2096. { Only modify the first parameter }
  2097. if ReplaceRegisterInOper(hp, 0, CurrentReg, ReplaceReg) then
  2098. begin
  2099. Result := True;
  2100. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 2)', hp);
  2101. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2102. end;
  2103. 3:
  2104. { Only modify the second parameter }
  2105. if ReplaceRegisterInOper(hp, 1, CurrentReg, ReplaceReg) then
  2106. begin
  2107. Result := True;
  2108. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovIMul2MovIMul 3)', hp);
  2109. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2110. end;
  2111. else
  2112. InternalError(2020012901);
  2113. end;
  2114. end;
  2115. else
  2116. if (hp.ops > 0) and
  2117. ReplaceRegisterInInstruction(hp, CurrentReg, ReplaceReg) then
  2118. begin
  2119. Result := True;
  2120. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + debug_regname(ReplaceReg) + '; changed to minimise pipeline stall (MovXXX2MovXXX)', hp);
  2121. AllocRegBetween(ReplaceReg, p_mov, hp, UsedRegs);
  2122. end;
  2123. end;
  2124. end;
  2125. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  2126. var
  2127. hp1, hp2, hp3: tai;
  2128. DoOptimisation, TempBool: Boolean;
  2129. procedure convert_mov_value(signed_movop: tasmop; max_value: tcgint); inline;
  2130. begin
  2131. if taicpu(hp1).opcode = signed_movop then
  2132. begin
  2133. if taicpu(p).oper[0]^.val > max_value shr 1 then
  2134. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val - max_value - 1 { Convert to signed }
  2135. end
  2136. else
  2137. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and max_value; { Trim to unsigned }
  2138. end;
  2139. var
  2140. GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
  2141. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  2142. NewSize: topsize;
  2143. CurrentReg, ActiveReg: TRegister;
  2144. SourceRef, TargetRef: TReference;
  2145. MovAligned, MovUnaligned: TAsmOp;
  2146. begin
  2147. Result:=false;
  2148. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  2149. { remove mov reg1,reg1? }
  2150. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  2151. then
  2152. begin
  2153. DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p);
  2154. { take care of the register (de)allocs following p }
  2155. RemoveCurrentP(p, hp1);
  2156. Result:=true;
  2157. exit;
  2158. end;
  2159. { All the next optimisations require a next instruction }
  2160. if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
  2161. Exit;
  2162. { Look for:
  2163. mov %reg1,%reg2
  2164. ??? %reg2,r/m
  2165. Change to:
  2166. mov %reg1,%reg2
  2167. ??? %reg1,r/m
  2168. }
  2169. if MatchOpType(taicpu(p), top_reg, top_reg) then
  2170. begin
  2171. CurrentReg := taicpu(p).oper[1]^.reg;
  2172. if RegReadByInstruction(CurrentReg, hp1) and
  2173. DeepMOVOpt(taicpu(p), taicpu(hp1)) then
  2174. begin
  2175. TransferUsedRegs(TmpUsedRegs);
  2176. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2177. if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
  2178. { Just in case something didn't get modified (e.g. an
  2179. implicit register) }
  2180. not RegReadByInstruction(CurrentReg, hp1) then
  2181. begin
  2182. { We can remove the original MOV }
  2183. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
  2184. RemoveCurrentp(p, hp1);
  2185. { UsedRegs got updated by RemoveCurrentp }
  2186. Result := True;
  2187. Exit;
  2188. end;
  2189. { If we know a MOV instruction has become a null operation, we might as well
  2190. get rid of it now to save time. }
  2191. if (taicpu(hp1).opcode = A_MOV) and
  2192. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2193. SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
  2194. { Just being a register is enough to confirm it's a null operation }
  2195. (taicpu(hp1).oper[0]^.typ = top_reg) then
  2196. begin
  2197. Result := True;
  2198. { Speed-up to reduce a pipeline stall... if we had something like...
  2199. movl %eax,%edx
  2200. movw %dx,%ax
  2201. ... the second instruction would change to movw %ax,%ax, but
  2202. given that it is now %ax that's active rather than %eax,
  2203. penalties might occur due to a partial register write, so instead,
  2204. change it to a MOVZX instruction when optimising for speed.
  2205. }
  2206. if not (cs_opt_size in current_settings.optimizerswitches) and
  2207. IsMOVZXAcceptable and
  2208. (taicpu(hp1).opsize < taicpu(p).opsize)
  2209. {$ifdef x86_64}
  2210. { operations already implicitly set the upper 64 bits to zero }
  2211. and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
  2212. {$endif x86_64}
  2213. then
  2214. begin
  2215. CurrentReg := taicpu(hp1).oper[1]^.reg;
  2216. DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
  2217. case taicpu(p).opsize of
  2218. S_W:
  2219. if taicpu(hp1).opsize = S_B then
  2220. taicpu(hp1).opsize := S_BL
  2221. else
  2222. InternalError(2020012911);
  2223. S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
  2224. case taicpu(hp1).opsize of
  2225. S_B:
  2226. taicpu(hp1).opsize := S_BL;
  2227. S_W:
  2228. taicpu(hp1).opsize := S_WL;
  2229. else
  2230. InternalError(2020012912);
  2231. end;
  2232. else
  2233. InternalError(2020012910);
  2234. end;
  2235. taicpu(hp1).opcode := A_MOVZX;
  2236. taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
  2237. end
  2238. else
  2239. begin
  2240. GetNextInstruction_p := GetNextInstruction(hp1, hp2);
  2241. DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
  2242. RemoveInstruction(hp1);
  2243. { The instruction after what was hp1 is now the immediate next instruction,
  2244. so we can continue to make optimisations if it's present }
  2245. if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
  2246. Exit;
  2247. hp1 := hp2;
  2248. end;
  2249. end;
  2250. end;
  2251. end;
  2252. { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
  2253. overwrites the original destination register. e.g.
  2254. movl ###,%reg2d
  2255. movslq ###,%reg2q (### doesn't have to be the same as the first one)
  2256. In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
  2257. }
  2258. if (taicpu(p).oper[1]^.typ = top_reg) and
  2259. MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
  2260. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2261. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
  2262. begin
  2263. if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
  2264. begin
  2265. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2266. case taicpu(p).oper[0]^.typ of
  2267. top_const:
  2268. { We have something like:
  2269. movb $x, %regb
  2270. movzbl %regb,%regd
  2271. Change to:
  2272. movl $x, %regd
  2273. }
  2274. begin
  2275. case taicpu(hp1).opsize of
  2276. S_BW:
  2277. begin
  2278. convert_mov_value(A_MOVSX, $FF);
  2279. setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
  2280. taicpu(p).opsize := S_W;
  2281. end;
  2282. S_BL:
  2283. begin
  2284. convert_mov_value(A_MOVSX, $FF);
  2285. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2286. taicpu(p).opsize := S_L;
  2287. end;
  2288. S_WL:
  2289. begin
  2290. convert_mov_value(A_MOVSX, $FFFF);
  2291. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2292. taicpu(p).opsize := S_L;
  2293. end;
  2294. {$ifdef x86_64}
  2295. S_BQ:
  2296. begin
  2297. convert_mov_value(A_MOVSX, $FF);
  2298. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2299. taicpu(p).opsize := S_Q;
  2300. end;
  2301. S_WQ:
  2302. begin
  2303. convert_mov_value(A_MOVSX, $FFFF);
  2304. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2305. taicpu(p).opsize := S_Q;
  2306. end;
  2307. S_LQ:
  2308. begin
  2309. convert_mov_value(A_MOVSXD, $FFFFFFFF); { Note it's MOVSXD, not MOVSX }
  2310. setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
  2311. taicpu(p).opsize := S_Q;
  2312. end;
  2313. {$endif x86_64}
  2314. else
  2315. { If hp1 was a MOV instruction, it should have been
  2316. optimised already }
  2317. InternalError(2020021001);
  2318. end;
  2319. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
  2320. RemoveInstruction(hp1);
  2321. Result := True;
  2322. Exit;
  2323. end;
  2324. top_ref:
  2325. { We have something like:
  2326. movb mem, %regb
  2327. movzbl %regb,%regd
  2328. Change to:
  2329. movzbl mem, %regd
  2330. }
  2331. if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
  2332. begin
  2333. DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
  2334. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  2335. RemoveCurrentP(p, hp1);
  2336. Result:=True;
  2337. Exit;
  2338. end;
  2339. else
  2340. if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
  2341. { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
  2342. Exit;
  2343. end;
  2344. end
  2345. { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
  2346. and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
  2347. optimised }
  2348. else
  2349. begin
  2350. DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
  2351. RemoveCurrentP(p, hp1);
  2352. Result := True;
  2353. Exit;
  2354. end;
  2355. end;
  2356. if (taicpu(hp1).opcode = A_AND) and
  2357. (taicpu(p).oper[1]^.typ = top_reg) and
  2358. MatchOpType(taicpu(hp1),top_const,top_reg) then
  2359. begin
  2360. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  2361. begin
  2362. case taicpu(p).opsize of
  2363. S_L:
  2364. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2365. begin
  2366. { Optimize out:
  2367. mov x, %reg
  2368. and ffffffffh, %reg
  2369. }
  2370. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  2371. RemoveInstruction(hp1);
  2372. Result:=true;
  2373. exit;
  2374. end;
  2375. S_Q: { TODO: Confirm if this is even possible }
  2376. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  2377. begin
  2378. { Optimize out:
  2379. mov x, %reg
  2380. and ffffffffffffffffh, %reg
  2381. }
  2382. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  2383. RemoveInstruction(hp1);
  2384. Result:=true;
  2385. exit;
  2386. end;
  2387. else
  2388. ;
  2389. end;
  2390. if ((taicpu(p).oper[0]^.typ=top_reg) or
  2391. ((taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr<>addr_full))) and
  2392. GetNextInstruction(hp1,hp2) and
  2393. MatchInstruction(hp2,A_TEST,[taicpu(p).opsize]) and
  2394. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) and
  2395. (MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or
  2396. MatchOperand(taicpu(hp2).oper[0]^,-1)) and
  2397. GetNextInstruction(hp2,hp3) and
  2398. MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and
  2399. (taicpu(hp3).condition in [C_E,C_NE]) then
  2400. begin
  2401. TransferUsedRegs(TmpUsedRegs);
  2402. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2403. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2404. if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then
  2405. begin
  2406. DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p);
  2407. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  2408. taicpu(hp1).opcode:=A_TEST;
  2409. RemoveInstruction(hp2);
  2410. RemoveCurrentP(p, hp1);
  2411. Result:=true;
  2412. exit;
  2413. end;
  2414. end;
  2415. end
  2416. else if IsMOVZXAcceptable and
  2417. (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  2418. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  2419. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2420. then
  2421. begin
  2422. InputVal := debug_operstr(taicpu(p).oper[0]^);
  2423. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  2424. case taicpu(p).opsize of
  2425. S_B:
  2426. if (taicpu(hp1).oper[0]^.val = $ff) then
  2427. begin
  2428. { Convert:
  2429. movb x, %regl movb x, %regl
  2430. andw ffh, %regw andl ffh, %regd
  2431. To:
  2432. movzbw x, %regd movzbl x, %regd
  2433. (Identical registers, just different sizes)
  2434. }
  2435. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  2436. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  2437. case taicpu(hp1).opsize of
  2438. S_W: NewSize := S_BW;
  2439. S_L: NewSize := S_BL;
  2440. {$ifdef x86_64}
  2441. S_Q: NewSize := S_BQ;
  2442. {$endif x86_64}
  2443. else
  2444. InternalError(2018011510);
  2445. end;
  2446. end
  2447. else
  2448. NewSize := S_NO;
  2449. S_W:
  2450. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2451. begin
  2452. { Convert:
  2453. movw x, %regw
  2454. andl ffffh, %regd
  2455. To:
  2456. movzwl x, %regd
  2457. (Identical registers, just different sizes)
  2458. }
  2459. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  2460. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  2461. case taicpu(hp1).opsize of
  2462. S_L: NewSize := S_WL;
  2463. {$ifdef x86_64}
  2464. S_Q: NewSize := S_WQ;
  2465. {$endif x86_64}
  2466. else
  2467. InternalError(2018011511);
  2468. end;
  2469. end
  2470. else
  2471. NewSize := S_NO;
  2472. else
  2473. NewSize := S_NO;
  2474. end;
  2475. if NewSize <> S_NO then
  2476. begin
  2477. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  2478. { The actual optimization }
  2479. taicpu(p).opcode := A_MOVZX;
  2480. taicpu(p).changeopsize(NewSize);
  2481. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  2482. { Safeguard if "and" is followed by a conditional command }
  2483. TransferUsedRegs(TmpUsedRegs);
  2484. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2485. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  2486. begin
  2487. { At this point, the "and" command is effectively equivalent to
  2488. "test %reg,%reg". This will be handled separately by the
  2489. Peephole Optimizer. [Kit] }
  2490. DebugMsg(SPeepholeOptimization + PreMessage +
  2491. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2492. end
  2493. else
  2494. begin
  2495. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  2496. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  2497. RemoveInstruction(hp1);
  2498. end;
  2499. Result := True;
  2500. Exit;
  2501. end;
  2502. end;
  2503. end;
  2504. if (taicpu(hp1).opcode = A_OR) and
  2505. (taicpu(p).oper[1]^.typ = top_reg) and
  2506. MatchOperand(taicpu(p).oper[0]^, 0) and
  2507. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
  2508. begin
  2509. { mov 0, %reg
  2510. or ###,%reg
  2511. Change to (only if the flags are not used):
  2512. mov ###,%reg
  2513. }
  2514. TransferUsedRegs(TmpUsedRegs);
  2515. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2516. DoOptimisation := True;
  2517. { Even if the flags are used, we might be able to do the optimisation
  2518. if the conditions are predictable }
  2519. if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2520. begin
  2521. { Only perform if ### = %reg (the same register) or equal to 0,
  2522. so %reg is guaranteed to still have a value of zero }
  2523. if MatchOperand(taicpu(hp1).oper[0]^, 0) or
  2524. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) then
  2525. begin
  2526. hp2 := hp1;
  2527. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2528. while RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
  2529. GetNextInstruction(hp2, hp3) do
  2530. begin
  2531. { Don't continue modifying if the flags state is getting changed }
  2532. if RegModifiedByInstruction(NR_DEFAULTFLAGS, hp3) then
  2533. Break;
  2534. UpdateUsedRegs(TmpUsedRegs, tai(hp3.Next));
  2535. if MatchInstruction(hp3, A_Jcc, A_SETcc, A_CMOVcc, []) then
  2536. begin
  2537. if condition_in(C_E, taicpu(hp3).condition) or (taicpu(hp3).condition in [C_NC, C_NS, C_NO]) then
  2538. begin
  2539. { Condition is always true }
  2540. case taicpu(hp3).opcode of
  2541. A_Jcc:
  2542. begin
  2543. DebugMsg(SPeepholeOptimization + 'Condition is always true (jump made unconditional)', hp3);
  2544. { Check for jump shortcuts before we destroy the condition }
  2545. DoJumpOptimizations(hp3, TempBool);
  2546. MakeUnconditional(taicpu(hp3));
  2547. Result := True;
  2548. end;
  2549. A_CMOVcc:
  2550. begin
  2551. DebugMsg(SPeepholeOptimization + 'Condition is always true (CMOVcc -> MOV)', hp3);
  2552. taicpu(hp3).opcode := A_MOV;
  2553. taicpu(hp3).condition := C_None;
  2554. Result := True;
  2555. end;
  2556. A_SETcc:
  2557. begin
  2558. DebugMsg(SPeepholeOptimization + 'Condition is always true (changed to MOV 1)', hp3);
  2559. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  2560. taicpu(hp3).opcode := A_MOV;
  2561. taicpu(hp3).ops := 2;
  2562. taicpu(hp3).condition := C_None;
  2563. taicpu(hp3).opsize := S_B;
  2564. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2565. taicpu(hp3).loadconst(0, 1);
  2566. Result := True;
  2567. end;
  2568. else
  2569. InternalError(2021090701);
  2570. end;
  2571. end
  2572. else if (taicpu(hp3).condition in [C_A, C_B, C_C, C_G, C_L, C_NE, C_NZ, C_O, C_S]) then
  2573. begin
  2574. { Condition is always false }
  2575. case taicpu(hp3).opcode of
  2576. A_Jcc:
  2577. begin
  2578. DebugMsg(SPeepholeOptimization + 'Condition is always false (jump removed)', hp3);
  2579. TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2580. RemoveInstruction(hp3);
  2581. Result := True;
  2582. { Since hp3 was deleted, hp2 must not be updated }
  2583. Continue;
  2584. end;
  2585. A_CMOVcc:
  2586. begin
  2587. DebugMsg(SPeepholeOptimization + 'Condition is always false (conditional load removed)', hp3);
  2588. RemoveInstruction(hp3);
  2589. Result := True;
  2590. { Since hp3 was deleted, hp2 must not be updated }
  2591. Continue;
  2592. end;
  2593. A_SETcc:
  2594. begin
  2595. DebugMsg(SPeepholeOptimization + 'Condition is always false (changed to MOV 0)', hp3);
  2596. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  2597. taicpu(hp3).opcode := A_MOV;
  2598. taicpu(hp3).ops := 2;
  2599. taicpu(hp3).condition := C_None;
  2600. taicpu(hp3).opsize := S_B;
  2601. taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
  2602. taicpu(hp3).loadconst(0, 0);
  2603. Result := True;
  2604. end;
  2605. else
  2606. InternalError(2021090702);
  2607. end;
  2608. end
  2609. else
  2610. { Uncertain what to do - don't optimise (although optimise other conditional statements if present) }
  2611. DoOptimisation := False;
  2612. end;
  2613. hp2 := hp3;
  2614. end;
  2615. { Flags are still in use - don't optimise }
  2616. if DoOptimisation and RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
  2617. DoOptimisation := False;
  2618. end
  2619. else
  2620. DoOptimisation := False;
  2621. end;
  2622. if DoOptimisation then
  2623. begin
  2624. {$ifdef x86_64}
  2625. { OR only supports 32-bit sign-extended constants for 64-bit
  2626. instructions, so compensate for this if the constant is
  2627. encoded as a value greater than or equal to 2^31 }
  2628. if (taicpu(hp1).opsize = S_Q) and
  2629. (taicpu(hp1).oper[0]^.typ = top_const) and
  2630. (taicpu(hp1).oper[0]^.val >= $80000000) then
  2631. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or $FFFFFFFF00000000;
  2632. {$endif x86_64}
  2633. DebugMsg(SPeepholeOptimization + 'MOV 0 / OR -> MOV', p);
  2634. taicpu(hp1).opcode := A_MOV;
  2635. RemoveCurrentP(p, hp1);
  2636. Result := True;
  2637. Exit;
  2638. end;
  2639. end;
  2640. { Next instruction is also a MOV ? }
  2641. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  2642. begin
  2643. if (taicpu(p).oper[1]^.typ = top_reg) and
  2644. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  2645. begin
  2646. CurrentReg := taicpu(p).oper[1]^.reg;
  2647. TransferUsedRegs(TmpUsedRegs);
  2648. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  2649. { we have
  2650. mov x, %treg
  2651. mov %treg, y
  2652. }
  2653. if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
  2654. if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
  2655. { we've got
  2656. mov x, %treg
  2657. mov %treg, y
  2658. with %treg is not used after }
  2659. case taicpu(p).oper[0]^.typ Of
  2660. { top_reg is covered by DeepMOVOpt }
  2661. top_const:
  2662. begin
  2663. { change
  2664. mov const, %treg
  2665. mov %treg, y
  2666. to
  2667. mov const, y
  2668. }
  2669. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  2670. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  2671. begin
  2672. if taicpu(hp1).oper[1]^.typ=top_reg then
  2673. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2674. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  2675. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  2676. RemoveInstruction(hp1);
  2677. Result:=true;
  2678. Exit;
  2679. end;
  2680. end;
  2681. top_ref:
  2682. case taicpu(hp1).oper[1]^.typ of
  2683. top_reg:
  2684. begin
  2685. { change
  2686. mov mem, %treg
  2687. mov %treg, %reg
  2688. to
  2689. mov mem, %reg"
  2690. }
  2691. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  2692. taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
  2693. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  2694. RemoveInstruction(hp1);
  2695. Result:=true;
  2696. Exit;
  2697. end;
  2698. top_ref:
  2699. begin
  2700. {$ifdef x86_64}
  2701. { Look for the following to simplify:
  2702. mov x(mem1), %reg
  2703. mov %reg, y(mem2)
  2704. mov x+8(mem1), %reg
  2705. mov %reg, y+8(mem2)
  2706. Change to:
  2707. movdqu x(mem1), %xmmreg
  2708. movdqu %xmmreg, y(mem2)
  2709. }
  2710. SourceRef := taicpu(p).oper[0]^.ref^;
  2711. TargetRef := taicpu(hp1).oper[1]^.ref^;
  2712. if (taicpu(p).opsize = S_Q) and
  2713. GetNextInstruction(hp1, hp2) and
  2714. MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
  2715. MatchOpType(taicpu(hp2), top_ref, top_reg) then
  2716. begin
  2717. { Delay calling GetNextInstruction(hp2, hp3) for as long as possible }
  2718. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  2719. Inc(SourceRef.offset, 8);
  2720. if UseAVX then
  2721. begin
  2722. MovAligned := A_VMOVDQA;
  2723. MovUnaligned := A_VMOVDQU;
  2724. end
  2725. else
  2726. begin
  2727. MovAligned := A_MOVDQA;
  2728. MovUnaligned := A_MOVDQU;
  2729. end;
  2730. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2731. begin
  2732. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2733. Inc(TargetRef.offset, 8);
  2734. if GetNextInstruction(hp2, hp3) and
  2735. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2736. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2737. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2738. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2739. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2740. begin
  2741. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2742. if CurrentReg <> NR_NO then
  2743. begin
  2744. { Remember that the offsets are 8 ahead }
  2745. if ((SourceRef.offset mod 16) = 8) and
  2746. (
  2747. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2748. (SourceRef.base = current_procinfo.framepointer) or
  2749. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2750. ) then
  2751. taicpu(p).opcode := MovAligned
  2752. else
  2753. taicpu(p).opcode := MovUnaligned;
  2754. taicpu(p).opsize := S_XMM;
  2755. taicpu(p).oper[1]^.reg := CurrentReg;
  2756. if ((TargetRef.offset mod 16) = 8) and
  2757. (
  2758. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2759. (TargetRef.base = current_procinfo.framepointer) or
  2760. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2761. ) then
  2762. taicpu(hp1).opcode := MovAligned
  2763. else
  2764. taicpu(hp1).opcode := MovUnaligned;
  2765. taicpu(hp1).opsize := S_XMM;
  2766. taicpu(hp1).oper[0]^.reg := CurrentReg;
  2767. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
  2768. RemoveInstruction(hp2);
  2769. RemoveInstruction(hp3);
  2770. Result := True;
  2771. Exit;
  2772. end;
  2773. end;
  2774. end
  2775. else
  2776. begin
  2777. { See if the next references are 8 less rather than 8 greater }
  2778. Dec(SourceRef.offset, 16); { -8 the other way }
  2779. if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
  2780. begin
  2781. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  2782. Dec(TargetRef.offset, 8); { Only 8, not 16, as it wasn't incremented unlike SourceRef }
  2783. if GetNextInstruction(hp2, hp3) and
  2784. MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
  2785. MatchOpType(taicpu(hp3), top_reg, top_ref) and
  2786. (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
  2787. RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
  2788. not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
  2789. begin
  2790. CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
  2791. if CurrentReg <> NR_NO then
  2792. begin
  2793. { hp2 and hp3 are the starting offsets, so mod 0 this time }
  2794. if ((SourceRef.offset mod 16) = 0) and
  2795. (
  2796. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2797. (SourceRef.base = current_procinfo.framepointer) or
  2798. ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
  2799. ) then
  2800. taicpu(hp2).opcode := MovAligned
  2801. else
  2802. taicpu(hp2).opcode := MovUnaligned;
  2803. taicpu(hp2).opsize := S_XMM;
  2804. taicpu(hp2).oper[1]^.reg := CurrentReg;
  2805. if ((TargetRef.offset mod 16) = 0) and
  2806. (
  2807. { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
  2808. (TargetRef.base = current_procinfo.framepointer) or
  2809. ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
  2810. ) then
  2811. taicpu(hp3).opcode := MovAligned
  2812. else
  2813. taicpu(hp3).opcode := MovUnaligned;
  2814. taicpu(hp3).opsize := S_XMM;
  2815. taicpu(hp3).oper[0]^.reg := CurrentReg;
  2816. DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
  2817. RemoveInstruction(hp1);
  2818. RemoveCurrentP(p, hp2);
  2819. Result := True;
  2820. Exit;
  2821. end;
  2822. end;
  2823. end;
  2824. end;
  2825. end;
  2826. {$endif x86_64}
  2827. end;
  2828. else
  2829. { The write target should be a reg or a ref }
  2830. InternalError(2021091601);
  2831. end;
  2832. else
  2833. ;
  2834. end
  2835. else
  2836. { %treg is used afterwards, but all eventualities
  2837. other than the first MOV instruction being a constant
  2838. are covered by DeepMOVOpt, so only check for that }
  2839. if (taicpu(p).oper[0]^.typ = top_const) and
  2840. (
  2841. { For MOV operations, a size saving is only made if the register/const is byte-sized }
  2842. not (cs_opt_size in current_settings.optimizerswitches) or
  2843. (taicpu(hp1).opsize = S_B)
  2844. ) and
  2845. (
  2846. (taicpu(hp1).oper[1]^.typ = top_reg) or
  2847. ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
  2848. ) then
  2849. begin
  2850. DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
  2851. taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
  2852. end;
  2853. end;
  2854. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  2855. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  2856. { mov reg1, mem1 or mov mem1, reg1
  2857. mov mem2, reg2 mov reg2, mem2}
  2858. begin
  2859. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  2860. { mov reg1, mem1 or mov mem1, reg1
  2861. mov mem2, reg1 mov reg2, mem1}
  2862. begin
  2863. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2864. { Removes the second statement from
  2865. mov reg1, mem1/reg2
  2866. mov mem1/reg2, reg1 }
  2867. begin
  2868. if taicpu(p).oper[0]^.typ=top_reg then
  2869. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  2870. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  2871. RemoveInstruction(hp1);
  2872. Result:=true;
  2873. exit;
  2874. end
  2875. else
  2876. begin
  2877. TransferUsedRegs(TmpUsedRegs);
  2878. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2879. if (taicpu(p).oper[1]^.typ = top_ref) and
  2880. { mov reg1, mem1
  2881. mov mem2, reg1 }
  2882. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  2883. GetNextInstruction(hp1, hp2) and
  2884. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  2885. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2886. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  2887. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  2888. { change to
  2889. mov reg1, mem1 mov reg1, mem1
  2890. mov mem2, reg1 cmp reg1, mem2
  2891. cmp mem1, reg1
  2892. }
  2893. begin
  2894. RemoveInstruction(hp2);
  2895. taicpu(hp1).opcode := A_CMP;
  2896. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  2897. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2898. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2899. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  2900. end;
  2901. end;
  2902. end
  2903. else if (taicpu(p).oper[1]^.typ=top_ref) and
  2904. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2905. begin
  2906. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  2907. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  2908. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  2909. end
  2910. else
  2911. begin
  2912. TransferUsedRegs(TmpUsedRegs);
  2913. if GetNextInstruction(hp1, hp2) and
  2914. MatchOpType(taicpu(p),top_ref,top_reg) and
  2915. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2916. (taicpu(hp1).oper[1]^.typ = top_ref) and
  2917. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  2918. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  2919. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  2920. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  2921. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  2922. { mov mem1, %reg1
  2923. mov %reg1, mem2
  2924. mov mem2, reg2
  2925. to:
  2926. mov mem1, reg2
  2927. mov reg2, mem2}
  2928. begin
  2929. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  2930. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  2931. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  2932. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2933. RemoveInstruction(hp2);
  2934. Result := True;
  2935. end
  2936. {$ifdef i386}
  2937. { this is enabled for i386 only, as the rules to create the reg sets below
  2938. are too complicated for x86-64, so this makes this code too error prone
  2939. on x86-64
  2940. }
  2941. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  2942. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  2943. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  2944. { mov mem1, reg1 mov mem1, reg1
  2945. mov reg1, mem2 mov reg1, mem2
  2946. mov mem2, reg2 mov mem2, reg1
  2947. to: to:
  2948. mov mem1, reg1 mov mem1, reg1
  2949. mov mem1, reg2 mov reg1, mem2
  2950. mov reg1, mem2
  2951. or (if mem1 depends on reg1
  2952. and/or if mem2 depends on reg2)
  2953. to:
  2954. mov mem1, reg1
  2955. mov reg1, mem2
  2956. mov reg1, reg2
  2957. }
  2958. begin
  2959. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2960. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  2961. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  2962. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  2963. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2964. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  2965. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2966. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  2967. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  2968. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  2969. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  2970. end
  2971. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  2972. begin
  2973. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  2974. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  2975. end
  2976. else
  2977. begin
  2978. RemoveInstruction(hp2);
  2979. end
  2980. {$endif i386}
  2981. ;
  2982. end;
  2983. end
  2984. { movl [mem1],reg1
  2985. movl [mem1],reg2
  2986. to
  2987. movl [mem1],reg1
  2988. movl reg1,reg2
  2989. }
  2990. else if MatchOpType(taicpu(p),top_ref,top_reg) and
  2991. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2992. (taicpu(p).opsize = taicpu(hp1).opsize) and
  2993. RefsEqual(taicpu(p).oper[0]^.ref^,taicpu(hp1).oper[0]^.ref^) and
  2994. (taicpu(p).oper[0]^.ref^.volatility=[]) and
  2995. (taicpu(hp1).oper[0]^.ref^.volatility=[]) and
  2996. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.base)) and
  2997. not(SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^.index)) then
  2998. begin
  2999. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 2',p);
  3000. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  3001. end;
  3002. { movl const1,[mem1]
  3003. movl [mem1],reg1
  3004. to
  3005. movl const1,reg1
  3006. movl reg1,[mem1]
  3007. }
  3008. if MatchOpType(Taicpu(p),top_const,top_ref) and
  3009. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  3010. (taicpu(p).opsize = taicpu(hp1).opsize) and
  3011. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  3012. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  3013. begin
  3014. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  3015. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  3016. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  3017. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  3018. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  3019. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  3020. Result:=true;
  3021. exit;
  3022. end;
  3023. { mov x,reg1; mov y,reg1 -> mov y,reg1 is handled by the Mov2Nop 5 optimisation }
  3024. end;
  3025. { search further than the next instruction for a mov (as long as it's not a jump) }
  3026. if not is_calljmpuncondret(taicpu(hp1).opcode) and
  3027. { check as much as possible before the expensive GetNextInstructionUsingRegCond call }
  3028. (taicpu(p).oper[1]^.typ = top_reg) and
  3029. (taicpu(p).oper[0]^.typ in [top_reg,top_const]) and
  3030. not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
  3031. begin
  3032. { we work with hp2 here, so hp1 can be still used later on when
  3033. checking for GetNextInstruction_p }
  3034. hp3 := hp1;
  3035. { Initialise CrossJump (if it becomes True at any point, it will remain True) }
  3036. CrossJump := (taicpu(hp1).opcode = A_Jcc);
  3037. { Saves on a large number of dereferences }
  3038. ActiveReg := taicpu(p).oper[1]^.reg;
  3039. while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
  3040. { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
  3041. (hp2.typ=ait_instruction) do
  3042. begin
  3043. case taicpu(hp2).opcode of
  3044. A_MOV:
  3045. if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
  3046. ((taicpu(p).oper[0]^.typ=top_const) or
  3047. ((taicpu(p).oper[0]^.typ=top_reg) and
  3048. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
  3049. )
  3050. ) then
  3051. begin
  3052. { we have
  3053. mov x, %treg
  3054. mov %treg, y
  3055. }
  3056. TransferUsedRegs(TmpUsedRegs);
  3057. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3058. { We don't need to call UpdateUsedRegs for every instruction between
  3059. p and hp2 because the register we're concerned about will not
  3060. become deallocated (otherwise GetNextInstructionUsingReg would
  3061. have stopped at an earlier instruction). [Kit] }
  3062. TempRegUsed :=
  3063. CrossJump { Assume the register is in use if it crossed a conditional jump } or
  3064. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or
  3065. RegReadByInstruction(ActiveReg, hp1);
  3066. case taicpu(p).oper[0]^.typ Of
  3067. top_reg:
  3068. begin
  3069. { change
  3070. mov %reg, %treg
  3071. mov %treg, y
  3072. to
  3073. mov %reg, y
  3074. }
  3075. CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
  3076. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3077. if taicpu(hp2).oper[1]^.reg = CurrentReg then
  3078. begin
  3079. { %reg = y - remove hp2 completely (doing it here instead of relying on
  3080. the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
  3081. if TempRegUsed then
  3082. begin
  3083. DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
  3084. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3085. { Set the start of the next GetNextInstructionUsingRegCond search
  3086. to start at the entry right before hp2 (which is about to be removed) }
  3087. hp3 := tai(hp2.Previous);
  3088. RemoveInstruction(hp2);
  3089. { See if there's more we can optimise }
  3090. Continue;
  3091. end
  3092. else
  3093. begin
  3094. RemoveInstruction(hp2);
  3095. { We can remove the original MOV too }
  3096. DebugMsg(SPeepholeOptimization + 'MovMov2NopNop 6b done',p);
  3097. RemoveCurrentP(p, hp1);
  3098. Result:=true;
  3099. Exit;
  3100. end;
  3101. end
  3102. else
  3103. begin
  3104. AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
  3105. taicpu(hp2).loadReg(0, CurrentReg);
  3106. if TempRegUsed then
  3107. begin
  3108. { Don't remove the first instruction if the temporary register is in use }
  3109. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
  3110. { No need to set Result to True. If there's another instruction later on
  3111. that can be optimised, it will be detected when the main Pass 1 loop
  3112. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3113. end
  3114. else
  3115. begin
  3116. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
  3117. RemoveCurrentP(p, hp1);
  3118. Result:=true;
  3119. Exit;
  3120. end;
  3121. end;
  3122. end;
  3123. top_const:
  3124. if not (cs_opt_size in current_settings.optimizerswitches) or (taicpu(hp2).opsize = S_B) then
  3125. begin
  3126. { change
  3127. mov const, %treg
  3128. mov %treg, y
  3129. to
  3130. mov const, y
  3131. }
  3132. if (taicpu(hp2).oper[1]^.typ=top_reg) or
  3133. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  3134. begin
  3135. RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
  3136. taicpu(hp2).loadOper(0,taicpu(p).oper[0]^);
  3137. if TempRegUsed then
  3138. begin
  3139. { Don't remove the first instruction if the temporary register is in use }
  3140. DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 7a)',hp2);
  3141. { No need to set Result to True. If there's another instruction later on
  3142. that can be optimised, it will be detected when the main Pass 1 loop
  3143. reaches what is now hp2 and passes it through OptPass1MOV. [Kit] };
  3144. end
  3145. else
  3146. begin
  3147. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 7 done',p);
  3148. RemoveCurrentP(p, hp1);
  3149. Result:=true;
  3150. Exit;
  3151. end;
  3152. end;
  3153. end;
  3154. else
  3155. Internalerror(2019103001);
  3156. end;
  3157. end
  3158. else
  3159. if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then
  3160. begin
  3161. if not CrossJump and
  3162. not RegUsedBetween(ActiveReg, p, hp2) and
  3163. not RegReadByInstruction(ActiveReg, hp2) then
  3164. begin
  3165. { Register is not used before it is overwritten }
  3166. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
  3167. RemoveCurrentp(p, hp1);
  3168. Result := True;
  3169. Exit;
  3170. end;
  3171. if (taicpu(p).oper[0]^.typ = top_const) and
  3172. (taicpu(hp2).oper[0]^.typ = top_const) then
  3173. begin
  3174. if taicpu(p).oper[0]^.val = taicpu(hp2).oper[0]^.val then
  3175. begin
  3176. { Same value - register hasn't changed }
  3177. DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done', hp2);
  3178. RemoveInstruction(hp2);
  3179. Result := True;
  3180. { See if there's more we can optimise }
  3181. Continue;
  3182. end;
  3183. end;
  3184. end;
  3185. A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  3186. if MatchOpType(taicpu(hp2), top_reg, top_reg) and
  3187. MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and
  3188. SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then
  3189. begin
  3190. {
  3191. Change from:
  3192. mov ###, %reg
  3193. ...
  3194. movs/z %reg,%reg (Same register, just different sizes)
  3195. To:
  3196. movs/z ###, %reg (Longer version)
  3197. ...
  3198. (remove)
  3199. }
  3200. DebugMsg(SPeepholeOptimization + 'MovMovs/z2Mov/s/z done', p);
  3201. taicpu(p).oper[1]^.reg := taicpu(hp2).oper[1]^.reg;
  3202. { Keep the first instruction as mov if ### is a constant }
  3203. if taicpu(p).oper[0]^.typ = top_const then
  3204. taicpu(p).opsize := reg2opsize(taicpu(hp2).oper[1]^.reg)
  3205. else
  3206. begin
  3207. taicpu(p).opcode := taicpu(hp2).opcode;
  3208. taicpu(p).opsize := taicpu(hp2).opsize;
  3209. end;
  3210. DebugMsg(SPeepholeOptimization + 'Removed movs/z instruction and extended earlier write (MovMovs/z2Mov/s/z)', hp2);
  3211. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp2, UsedRegs);
  3212. RemoveInstruction(hp2);
  3213. Result := True;
  3214. Exit;
  3215. end;
  3216. else
  3217. if MatchOpType(taicpu(p), top_reg, top_reg) then
  3218. begin
  3219. TransferUsedRegs(TmpUsedRegs);
  3220. TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
  3221. if
  3222. not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and
  3223. not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and
  3224. DeepMovOpt(taicpu(p), taicpu(hp2)) then
  3225. begin
  3226. { Just in case something didn't get modified (e.g. an
  3227. implicit register) }
  3228. if not RegReadByInstruction(ActiveReg, hp2) and
  3229. { If a conditional jump was crossed, do not delete
  3230. the original MOV no matter what }
  3231. not CrossJump then
  3232. begin
  3233. TransferUsedRegs(TmpUsedRegs);
  3234. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3235. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  3236. if
  3237. { Make sure the original register isn't still present
  3238. and has been written to (e.g. with SHRX) }
  3239. RegLoadedWithNewValue(ActiveReg, hp2) or
  3240. not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
  3241. begin
  3242. RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
  3243. { We can remove the original MOV }
  3244. DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
  3245. RemoveCurrentp(p, hp1);
  3246. Result := True;
  3247. Exit;
  3248. end
  3249. else
  3250. begin
  3251. { See if there's more we can optimise }
  3252. hp3 := hp2;
  3253. Continue;
  3254. end;
  3255. end;
  3256. end;
  3257. end;
  3258. end;
  3259. { Break out of the while loop under normal circumstances }
  3260. Break;
  3261. end;
  3262. end;
  3263. if (aoc_MovAnd2Mov_3 in OptsToCheck) and
  3264. (taicpu(p).oper[1]^.typ = top_reg) and
  3265. (taicpu(p).opsize = S_L) and
  3266. GetNextInstructionUsingRegTrackingUse(p,hp2,taicpu(p).oper[1]^.reg) and
  3267. (taicpu(hp2).opcode = A_AND) and
  3268. (MatchOpType(taicpu(hp2),top_const,top_reg) or
  3269. (MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3270. MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^))
  3271. ) then
  3272. begin
  3273. if SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) then
  3274. begin
  3275. if ((taicpu(hp2).oper[0]^.typ=top_const) and (taicpu(hp2).oper[0]^.val = $ffffffff)) or
  3276. ((taicpu(hp2).oper[0]^.typ=top_reg) and (taicpu(hp2).opsize=S_L)) then
  3277. begin
  3278. { Optimize out:
  3279. mov x, %reg
  3280. and ffffffffh, %reg
  3281. }
  3282. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 3 done',p);
  3283. RemoveInstruction(hp2);
  3284. Result:=true;
  3285. exit;
  3286. end;
  3287. end;
  3288. end;
  3289. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  3290. x >= RetOffset) as it doesn't do anything (it writes either to a
  3291. parameter or to the temporary storage room for the function
  3292. result)
  3293. }
  3294. if IsExitCode(hp1) and
  3295. (taicpu(p).oper[1]^.typ = top_ref) and
  3296. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  3297. (
  3298. (
  3299. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  3300. not (
  3301. assigned(current_procinfo.procdef.funcretsym) and
  3302. (taicpu(p).oper[1]^.ref^.offset <= tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)
  3303. )
  3304. ) or
  3305. { Also discard writes to the stack that are below the base pointer,
  3306. as this is temporary storage rather than a function result on the
  3307. stack, say. }
  3308. (
  3309. (taicpu(p).oper[1]^.ref^.base = NR_STACK_POINTER_REG) and
  3310. (taicpu(p).oper[1]^.ref^.offset < current_procinfo.final_localsize)
  3311. )
  3312. ) then
  3313. begin
  3314. RemoveCurrentp(p, hp1);
  3315. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  3316. RemoveLastDeallocForFuncRes(p);
  3317. Result:=true;
  3318. exit;
  3319. end;
  3320. if MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) then
  3321. begin
  3322. if MatchOpType(taicpu(p),top_reg,top_ref) and
  3323. (taicpu(hp1).oper[1]^.typ = top_ref) and
  3324. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  3325. begin
  3326. { change
  3327. mov reg1, mem1
  3328. test/cmp x, mem1
  3329. to
  3330. mov reg1, mem1
  3331. test/cmp x, reg1
  3332. }
  3333. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  3334. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  3335. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3336. Result := True;
  3337. Exit;
  3338. end;
  3339. if MatchOpType(taicpu(p),top_ref,top_reg) and
  3340. { The x86 assemblers have difficulty comparing values against absolute addresses }
  3341. (taicpu(p).oper[0]^.ref^.refaddr in [addr_no, addr_pic, addr_pic_no_got]) and
  3342. (taicpu(hp1).oper[0]^.typ <> top_ref) and
  3343. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  3344. (
  3345. (
  3346. (taicpu(hp1).opcode = A_TEST)
  3347. ) or (
  3348. (taicpu(hp1).opcode = A_CMP) and
  3349. { A sanity check more than anything }
  3350. not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
  3351. )
  3352. ) then
  3353. begin
  3354. { change
  3355. mov mem, %reg
  3356. cmp/test x, %reg / test %reg,%reg
  3357. (reg deallocated)
  3358. to
  3359. cmp/test x, mem / cmp 0, mem
  3360. }
  3361. TransferUsedRegs(TmpUsedRegs);
  3362. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  3363. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
  3364. begin
  3365. { Convert test %reg,%reg or test $-1,%reg to cmp $0,mem }
  3366. if (taicpu(hp1).opcode = A_TEST) and
  3367. (
  3368. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) or
  3369. MatchOperand(taicpu(hp1).oper[0]^, -1)
  3370. ) then
  3371. begin
  3372. taicpu(hp1).opcode := A_CMP;
  3373. taicpu(hp1).loadconst(0, 0);
  3374. end;
  3375. taicpu(hp1).loadref(1, taicpu(p).oper[0]^.ref^);
  3376. DebugMsg(SPeepholeOptimization + 'MOV/CMP -> CMP (memory check)', p);
  3377. RemoveCurrentP(p, hp1);
  3378. Result := True;
  3379. Exit;
  3380. end;
  3381. end;
  3382. end;
  3383. if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  3384. { If the flags register is in use, don't change the instruction to an
  3385. ADD otherwise this will scramble the flags. [Kit] }
  3386. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
  3387. begin
  3388. if MatchOpType(Taicpu(p),top_ref,top_reg) and
  3389. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  3390. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  3391. ) or
  3392. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  3393. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  3394. )
  3395. ) then
  3396. { mov reg1,ref
  3397. lea reg2,[reg1,reg2]
  3398. to
  3399. add reg2,ref}
  3400. begin
  3401. TransferUsedRegs(TmpUsedRegs);
  3402. { reg1 may not be used afterwards }
  3403. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  3404. begin
  3405. Taicpu(hp1).opcode:=A_ADD;
  3406. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  3407. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  3408. RemoveCurrentp(p, hp1);
  3409. result:=true;
  3410. exit;
  3411. end;
  3412. end;
  3413. { If the LEA instruction can be converted into an arithmetic instruction,
  3414. it may be possible to then fold it in the next optimisation, otherwise
  3415. there's nothing more that can be optimised here. }
  3416. if not ConvertLEA(taicpu(hp1)) then
  3417. Exit;
  3418. end;
  3419. if (taicpu(p).oper[1]^.typ = top_reg) and
  3420. (hp1.typ = ait_instruction) and
  3421. GetNextInstruction(hp1, hp2) and
  3422. MatchInstruction(hp2,A_MOV,[]) and
  3423. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  3424. (topsize2memsize[taicpu(hp1).opsize]>=topsize2memsize[taicpu(hp2).opsize]) and
  3425. (
  3426. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg)
  3427. {$ifdef x86_64}
  3428. or
  3429. (
  3430. (taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  3431. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ))
  3432. )
  3433. {$endif x86_64}
  3434. ) then
  3435. begin
  3436. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  3437. (taicpu(hp2).oper[0]^.typ=top_reg) then
  3438. { change movsX/movzX reg/ref, reg2
  3439. add/sub/or/... reg3/$const, reg2
  3440. mov reg2 reg/ref
  3441. dealloc reg2
  3442. to
  3443. add/sub/or/... reg3/$const, reg/ref }
  3444. begin
  3445. TransferUsedRegs(TmpUsedRegs);
  3446. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3447. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3448. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3449. begin
  3450. { by example:
  3451. movswl %si,%eax movswl %si,%eax p
  3452. decl %eax addl %edx,%eax hp1
  3453. movw %ax,%si movw %ax,%si hp2
  3454. ->
  3455. movswl %si,%eax movswl %si,%eax p
  3456. decw %eax addw %edx,%eax hp1
  3457. movw %ax,%si movw %ax,%si hp2
  3458. }
  3459. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  3460. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3461. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3462. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3463. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3464. {
  3465. ->
  3466. movswl %si,%eax movswl %si,%eax p
  3467. decw %si addw %dx,%si hp1
  3468. movw %ax,%si movw %ax,%si hp2
  3469. }
  3470. case taicpu(hp1).ops of
  3471. 1:
  3472. begin
  3473. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3474. if taicpu(hp1).oper[0]^.typ=top_reg then
  3475. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3476. end;
  3477. 2:
  3478. begin
  3479. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3480. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3481. (taicpu(hp1).opcode<>A_SHL) and
  3482. (taicpu(hp1).opcode<>A_SHR) and
  3483. (taicpu(hp1).opcode<>A_SAR) then
  3484. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3485. end;
  3486. else
  3487. internalerror(2008042701);
  3488. end;
  3489. {
  3490. ->
  3491. decw %si addw %dx,%si p
  3492. }
  3493. RemoveInstruction(hp2);
  3494. RemoveCurrentP(p, hp1);
  3495. Result:=True;
  3496. Exit;
  3497. end;
  3498. end;
  3499. if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  3500. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  3501. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  3502. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  3503. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  3504. )
  3505. {$ifdef i386}
  3506. { byte registers of esi, edi, ebp, esp are not available on i386 }
  3507. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3508. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  3509. {$endif i386}
  3510. then
  3511. { change movsX/movzX reg/ref, reg2
  3512. add/sub/or/... regX/$const, reg2
  3513. mov reg2, reg3
  3514. dealloc reg2
  3515. to
  3516. movsX/movzX reg/ref, reg3
  3517. add/sub/or/... reg3/$const, reg3
  3518. }
  3519. begin
  3520. TransferUsedRegs(TmpUsedRegs);
  3521. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3522. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  3523. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  3524. begin
  3525. { by example:
  3526. movswl %si,%eax movswl %si,%eax p
  3527. decl %eax addl %edx,%eax hp1
  3528. movw %ax,%si movw %ax,%si hp2
  3529. ->
  3530. movswl %si,%eax movswl %si,%eax p
  3531. decw %eax addw %edx,%eax hp1
  3532. movw %ax,%si movw %ax,%si hp2
  3533. }
  3534. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  3535. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  3536. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  3537. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize)+')',p);
  3538. { limit size of constants as well to avoid assembler errors, but
  3539. check opsize to avoid overflow when left shifting the 1 }
  3540. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=63) then
  3541. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl topsize2memsize[taicpu(hp2).opsize])-1);
  3542. {$ifdef x86_64}
  3543. { Be careful of, for example:
  3544. movl %reg1,%reg2
  3545. addl %reg3,%reg2
  3546. movq %reg2,%reg4
  3547. This will cause problems if the upper 32-bits of %reg3 or %reg4 are non-zero
  3548. }
  3549. if (taicpu(hp1).opsize = S_L) and (taicpu(hp2).opsize = S_Q) then
  3550. begin
  3551. taicpu(hp2).changeopsize(S_L);
  3552. setsubreg(taicpu(hp2).oper[0]^.reg, R_SUBD);
  3553. setsubreg(taicpu(hp2).oper[1]^.reg, R_SUBD);
  3554. end;
  3555. {$endif x86_64}
  3556. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3557. taicpu(p).changeopsize(taicpu(hp2).opsize);
  3558. if taicpu(p).oper[0]^.typ=top_reg then
  3559. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3560. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  3561. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  3562. {
  3563. ->
  3564. movswl %si,%eax movswl %si,%eax p
  3565. decw %si addw %dx,%si hp1
  3566. movw %ax,%si movw %ax,%si hp2
  3567. }
  3568. case taicpu(hp1).ops of
  3569. 1:
  3570. begin
  3571. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  3572. if taicpu(hp1).oper[0]^.typ=top_reg then
  3573. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3574. end;
  3575. 2:
  3576. begin
  3577. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  3578. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  3579. (taicpu(hp1).opcode<>A_SHL) and
  3580. (taicpu(hp1).opcode<>A_SHR) and
  3581. (taicpu(hp1).opcode<>A_SAR) then
  3582. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3583. end;
  3584. else
  3585. internalerror(2018111801);
  3586. end;
  3587. {
  3588. ->
  3589. decw %si addw %dx,%si p
  3590. }
  3591. RemoveInstruction(hp2);
  3592. end;
  3593. end;
  3594. end;
  3595. if MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  3596. GetNextInstruction(hp1, hp2) and
  3597. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  3598. MatchOperand(Taicpu(p).oper[0]^,0) and
  3599. (Taicpu(p).oper[1]^.typ = top_reg) and
  3600. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  3601. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  3602. { mov reg1,0
  3603. bts reg1,operand1 --> mov reg1,operand2
  3604. or reg1,operand2 bts reg1,operand1}
  3605. begin
  3606. Taicpu(hp2).opcode:=A_MOV;
  3607. asml.remove(hp1);
  3608. insertllitem(hp2,hp2.next,hp1);
  3609. RemoveCurrentp(p, hp1);
  3610. Result:=true;
  3611. exit;
  3612. end;
  3613. {$ifdef x86_64}
  3614. { Convert:
  3615. movq x(ref),%reg64
  3616. shrq y,%reg64
  3617. To:
  3618. movq x+4(ref),%reg32
  3619. shrq y-32,%reg32 (Remove if y = 32)
  3620. }
  3621. if (taicpu(p).opsize = S_Q) and
  3622. (taicpu(p).oper[0]^.typ = top_ref) and { Second operand will be a register }
  3623. (taicpu(p).oper[0]^.ref^.offset <= $7FFFFFFB) and
  3624. MatchInstruction(hp1, A_SHR, [taicpu(p).opsize]) and
  3625. MatchOpType(taicpu(hp1), top_const, top_reg) and
  3626. (taicpu(hp1).oper[0]^.val >= 32) and
  3627. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3628. begin
  3629. RegName1 := debug_regname(taicpu(hp1).oper[1]^.reg);
  3630. PreMessage := 'movq ' + debug_operstr(taicpu(p).oper[0]^) + ',' + RegName1 + '; ' +
  3631. 'shrq $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + RegName1 + ' -> movl ';
  3632. { Convert to 32-bit }
  3633. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3634. taicpu(p).opsize := S_L;
  3635. Inc(taicpu(p).oper[0]^.ref^.offset, 4);
  3636. PreMessage := PreMessage + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg);
  3637. if (taicpu(hp1).oper[0]^.val = 32) then
  3638. begin
  3639. DebugMsg(SPeepholeOptimization + PreMessage + ' (MovShr2Mov)', p);
  3640. RemoveInstruction(hp1);
  3641. end
  3642. else
  3643. begin
  3644. { This will potentially open up more arithmetic operations since
  3645. the peephole optimizer now has a big hint that only the lower
  3646. 32 bits are currently in use (and opcodes are smaller in size) }
  3647. setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
  3648. taicpu(hp1).opsize := S_L;
  3649. Dec(taicpu(hp1).oper[0]^.val, 32);
  3650. DebugMsg(SPeepholeOptimization + PreMessage +
  3651. '; shrl $' + debug_tostr(taicpu(hp1).oper[0]^.val) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (MovShr2MovShr)', p);
  3652. end;
  3653. Result := True;
  3654. Exit;
  3655. end;
  3656. {$endif x86_64}
  3657. end;
  3658. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  3659. var
  3660. hp1 : tai;
  3661. begin
  3662. Result:=false;
  3663. if taicpu(p).ops <> 2 then
  3664. exit;
  3665. if GetNextInstruction(p,hp1) and
  3666. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  3667. (taicpu(hp1).ops = 2) then
  3668. begin
  3669. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  3670. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  3671. { movXX reg1, mem1 or movXX mem1, reg1
  3672. movXX mem2, reg2 movXX reg2, mem2}
  3673. begin
  3674. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  3675. { movXX reg1, mem1 or movXX mem1, reg1
  3676. movXX mem2, reg1 movXX reg2, mem1}
  3677. begin
  3678. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  3679. begin
  3680. { Removes the second statement from
  3681. movXX reg1, mem1/reg2
  3682. movXX mem1/reg2, reg1
  3683. }
  3684. if taicpu(p).oper[0]^.typ=top_reg then
  3685. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  3686. { Removes the second statement from
  3687. movXX mem1/reg1, reg2
  3688. movXX reg2, mem1/reg1
  3689. }
  3690. if (taicpu(p).oper[1]^.typ=top_reg) and
  3691. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  3692. begin
  3693. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  3694. RemoveInstruction(hp1);
  3695. RemoveCurrentp(p); { p will now be equal to the instruction that follows what was hp1 }
  3696. end
  3697. else
  3698. begin
  3699. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  3700. RemoveInstruction(hp1);
  3701. end;
  3702. Result:=true;
  3703. exit;
  3704. end
  3705. end;
  3706. end;
  3707. end;
  3708. end;
  3709. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  3710. var
  3711. hp1 : tai;
  3712. begin
  3713. result:=false;
  3714. { replace
  3715. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  3716. MovX %mreg2,%mreg1
  3717. dealloc %mreg2
  3718. by
  3719. <Op>X %mreg2,%mreg1
  3720. ?
  3721. }
  3722. if GetNextInstruction(p,hp1) and
  3723. { we mix single and double opperations here because we assume that the compiler
  3724. generates vmovapd only after double operations and vmovaps only after single operations }
  3725. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  3726. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3727. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  3728. (taicpu(p).oper[0]^.typ=top_reg) then
  3729. begin
  3730. TransferUsedRegs(TmpUsedRegs);
  3731. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3732. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3733. begin
  3734. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  3735. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3736. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  3737. RemoveInstruction(hp1);
  3738. result:=true;
  3739. end;
  3740. end;
  3741. end;
  3742. function TX86AsmOptimizer.OptPass1Test(var p: tai) : boolean;
  3743. var
  3744. hp1, p_label, p_dist, hp1_dist: tai;
  3745. JumpLabel, JumpLabel_dist: TAsmLabel;
  3746. begin
  3747. Result := False;
  3748. if (taicpu(p).oper[1]^.typ = top_reg) then
  3749. begin
  3750. if GetNextInstruction(p, hp1) and
  3751. MatchInstruction(hp1,A_MOV,[]) and
  3752. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1) and
  3753. (
  3754. (taicpu(p).oper[0]^.typ <> top_reg) or
  3755. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  3756. ) then
  3757. begin
  3758. { If we have something like:
  3759. test %reg1,%reg1
  3760. mov 0,%reg2
  3761. And no registers are shared (the two %reg1's can be different, as
  3762. long as neither of them are also %reg2), move the MOV command to
  3763. before the comparison as this means it can be optimised without
  3764. worrying about the FLAGS register. (This combination is generated
  3765. by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  3766. }
  3767. SwapMovCmp(p, hp1);
  3768. Result := True;
  3769. Exit;
  3770. end;
  3771. { Search for:
  3772. test %reg,%reg
  3773. j(c1) @lbl1
  3774. ...
  3775. @lbl:
  3776. test %reg,%reg (same register)
  3777. j(c2) @lbl2
  3778. If c2 is a subset of c1, change to:
  3779. test %reg,%reg
  3780. j(c1) @lbl2
  3781. (@lbl1 may become a dead label as a result)
  3782. }
  3783. if (taicpu(p).oper[0]^.typ = top_reg) and
  3784. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  3785. MatchInstruction(hp1, A_JCC, []) and
  3786. IsJumpToLabel(taicpu(hp1)) then
  3787. begin
  3788. JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
  3789. p_label := nil;
  3790. if Assigned(JumpLabel) then
  3791. p_label := getlabelwithsym(JumpLabel);
  3792. if Assigned(p_label) and
  3793. GetNextInstruction(p_label, p_dist) and
  3794. MatchInstruction(p_dist, A_TEST, []) and
  3795. { It's fine if the second test uses smaller sub-registers }
  3796. (taicpu(p_dist).opsize <= taicpu(p).opsize) and
  3797. MatchOpType(taicpu(p_dist), top_reg, top_reg) and
  3798. SuperRegistersEqual(taicpu(p_dist).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  3799. SuperRegistersEqual(taicpu(p_dist).oper[1]^.reg, taicpu(p).oper[1]^.reg) and
  3800. GetNextInstruction(p_dist, hp1_dist) and
  3801. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  3802. begin
  3803. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  3804. if JumpLabel = JumpLabel_dist then
  3805. { This is an infinite loop }
  3806. Exit;
  3807. { Best optimisation when the first condition is a subset (or equal) of the second }
  3808. if condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
  3809. begin
  3810. { Any registers used here will already be allocated }
  3811. if Assigned(JumpLabel_dist) then
  3812. JumpLabel_dist.IncRefs;
  3813. if Assigned(JumpLabel) then
  3814. JumpLabel.DecRefs;
  3815. DebugMsg(SPeepholeOptimization + 'TEST/Jcc/@Lbl/TEST/Jcc -> TEST/Jcc, redirecting first jump', hp1);
  3816. taicpu(hp1).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  3817. Result := True;
  3818. Exit;
  3819. end;
  3820. end;
  3821. end;
  3822. end;
  3823. end;
  3824. function TX86AsmOptimizer.OptPass1Add(var p : tai) : boolean;
  3825. var
  3826. hp1 : tai;
  3827. begin
  3828. result:=false;
  3829. { replace
  3830. addX const,%reg1
  3831. leaX (%reg1,%reg1,Y),%reg2 // Base or index might not be equal to reg1
  3832. dealloc %reg1
  3833. by
  3834. leaX const+const*Y(%reg1,%reg1,Y),%reg2
  3835. }
  3836. if MatchOpType(taicpu(p),top_const,top_reg) and
  3837. GetNextInstruction(p,hp1) and
  3838. MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  3839. ((taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base) or
  3840. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index)) then
  3841. begin
  3842. TransferUsedRegs(TmpUsedRegs);
  3843. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3844. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3845. begin
  3846. DebugMsg(SPeepholeOptimization + 'AddLea2Lea done',p);
  3847. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.base then
  3848. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val);
  3849. if taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index then
  3850. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.val*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  3851. RemoveCurrentP(p);
  3852. result:=true;
  3853. end;
  3854. end;
  3855. end;
  3856. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  3857. var
  3858. hp1: tai;
  3859. ref: Integer;
  3860. saveref: treference;
  3861. TempReg: TRegister;
  3862. Multiple: TCGInt;
  3863. begin
  3864. Result:=false;
  3865. { removes seg register prefixes from LEA operations, as they
  3866. don't do anything}
  3867. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  3868. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  3869. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  3870. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  3871. (
  3872. { do not mess with leas accessing the stack pointer
  3873. unless it's a null operation }
  3874. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) or
  3875. (
  3876. (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
  3877. (taicpu(p).oper[0]^.ref^.offset = 0)
  3878. )
  3879. ) and
  3880. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  3881. begin
  3882. if (taicpu(p).oper[0]^.ref^.offset = 0) then
  3883. begin
  3884. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) then
  3885. begin
  3886. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  3887. taicpu(p).oper[1]^.reg);
  3888. InsertLLItem(p.previous,p.next, hp1);
  3889. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  3890. p.free;
  3891. p:=hp1;
  3892. end
  3893. else
  3894. begin
  3895. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  3896. RemoveCurrentP(p);
  3897. end;
  3898. Result:=true;
  3899. exit;
  3900. end
  3901. else if (
  3902. { continue to use lea to adjust the stack pointer,
  3903. it is the recommended way, but only if not optimizing for size }
  3904. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  3905. (cs_opt_size in current_settings.optimizerswitches)
  3906. ) and
  3907. { If the flags register is in use, don't change the instruction
  3908. to an ADD otherwise this will scramble the flags. [Kit] }
  3909. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  3910. ConvertLEA(taicpu(p)) then
  3911. begin
  3912. Result:=true;
  3913. exit;
  3914. end;
  3915. end;
  3916. if GetNextInstruction(p,hp1) and
  3917. (hp1.typ=ait_instruction) then
  3918. begin
  3919. if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  3920. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  3921. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  3922. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  3923. begin
  3924. TransferUsedRegs(TmpUsedRegs);
  3925. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3926. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3927. begin
  3928. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  3929. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  3930. RemoveInstruction(hp1);
  3931. result:=true;
  3932. exit;
  3933. end;
  3934. end;
  3935. { changes
  3936. lea <ref1>, reg1
  3937. <op> ...,<ref. with reg1>,...
  3938. to
  3939. <op> ...,<ref1>,... }
  3940. if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
  3941. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
  3942. not(MatchInstruction(hp1,A_LEA,[])) then
  3943. begin
  3944. { find a reference which uses reg1 }
  3945. if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
  3946. ref:=0
  3947. else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
  3948. ref:=1
  3949. else
  3950. ref:=-1;
  3951. if (ref<>-1) and
  3952. { reg1 must be either the base or the index }
  3953. ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
  3954. begin
  3955. { reg1 can be removed from the reference }
  3956. saveref:=taicpu(hp1).oper[ref]^.ref^;
  3957. if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
  3958. taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
  3959. else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
  3960. taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
  3961. else
  3962. Internalerror(2019111201);
  3963. { check if the can insert all data of the lea into the second instruction }
  3964. if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  3965. ((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
  3966. ((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
  3967. ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
  3968. ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
  3969. ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
  3970. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
  3971. {$ifdef x86_64}
  3972. and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
  3973. and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
  3974. ((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
  3975. )
  3976. {$endif x86_64}
  3977. then
  3978. begin
  3979. { reg1 might not used by the second instruction after it is remove from the reference }
  3980. if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
  3981. begin
  3982. TransferUsedRegs(TmpUsedRegs);
  3983. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  3984. { reg1 is not updated so it might not be used afterwards }
  3985. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  3986. begin
  3987. DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
  3988. if taicpu(p).oper[0]^.ref^.base<>NR_NO then
  3989. taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  3990. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  3991. taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  3992. if taicpu(p).oper[0]^.ref^.symbol<>nil then
  3993. taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
  3994. if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
  3995. taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
  3996. if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
  3997. taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  3998. inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  3999. RemoveCurrentP(p, hp1);
  4000. result:=true;
  4001. exit;
  4002. end
  4003. end;
  4004. end;
  4005. { recover }
  4006. taicpu(hp1).oper[ref]^.ref^:=saveref;
  4007. end;
  4008. end;
  4009. end;
  4010. { for now, we do not mess with the stack pointer, thought it might be usefull to remove
  4011. unneeded lea sequences on the stack pointer, it needs to be tested in detail }
  4012. if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  4013. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
  4014. begin
  4015. { Check common LEA/LEA conditions }
  4016. if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
  4017. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  4018. (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
  4019. (taicpu(p).oper[0]^.ref^.segment = NR_NO) and
  4020. (taicpu(p).oper[0]^.ref^.symbol = nil) and
  4021. (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
  4022. (taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
  4023. (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
  4024. (
  4025. (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
  4026. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
  4027. ) and (
  4028. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
  4029. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4030. not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
  4031. ) then
  4032. begin
  4033. { changes
  4034. lea (regX,scale), reg1
  4035. lea offset(reg1,reg1), reg1
  4036. to
  4037. lea offset(regX,scale*2), reg1
  4038. and
  4039. lea (regX,scale1), reg1
  4040. lea offset(reg1,scale2), reg1
  4041. to
  4042. lea offset(regX,scale1*scale2), reg1
  4043. ... so long as the final scale does not exceed 8
  4044. (Similarly, allow the first instruction to be "lea (regX,regX),reg1")
  4045. }
  4046. if (taicpu(p).oper[0]^.ref^.offset = 0) and
  4047. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4048. (
  4049. (
  4050. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4051. ) or (
  4052. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4053. (
  4054. (taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
  4055. not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
  4056. )
  4057. )
  4058. ) and (
  4059. (
  4060. { lea (reg1,scale2), reg1 variant }
  4061. (taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
  4062. (
  4063. (
  4064. (taicpu(p).oper[0]^.ref^.base = NR_NO) and
  4065. (taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
  4066. ) or (
  4067. { lea (regX,regX), reg1 variant }
  4068. (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  4069. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
  4070. )
  4071. )
  4072. ) or (
  4073. { lea (reg1,reg1), reg1 variant }
  4074. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4075. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
  4076. )
  4077. ) then
  4078. begin
  4079. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
  4080. { Make everything homogeneous to make calculations easier }
  4081. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
  4082. begin
  4083. if taicpu(p).oper[0]^.ref^.index <> NR_NO then
  4084. { Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
  4085. taicpu(p).oper[0]^.ref^.scalefactor := 2
  4086. else
  4087. taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
  4088. taicpu(p).oper[0]^.ref^.base := NR_NO;
  4089. end;
  4090. if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
  4091. begin
  4092. { Just to prevent miscalculations }
  4093. if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
  4094. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
  4095. else
  4096. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
  4097. end
  4098. else
  4099. begin
  4100. taicpu(hp1).oper[0]^.ref^.base := NR_NO;
  4101. taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
  4102. end;
  4103. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
  4104. RemoveCurrentP(p);
  4105. result:=true;
  4106. exit;
  4107. end
  4108. { changes
  4109. lea offset1(regX), reg1
  4110. lea offset2(reg1), reg1
  4111. to
  4112. lea offset1+offset2(regX), reg1 }
  4113. else if
  4114. (
  4115. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4116. (taicpu(p).oper[0]^.ref^.index = NR_NO)
  4117. ) or (
  4118. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  4119. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  4120. (
  4121. (
  4122. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4123. (taicpu(p).oper[0]^.ref^.base = NR_NO)
  4124. ) or (
  4125. (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
  4126. (
  4127. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4128. (
  4129. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4130. (
  4131. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
  4132. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  4133. )
  4134. )
  4135. )
  4136. )
  4137. )
  4138. ) then
  4139. begin
  4140. DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
  4141. if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
  4142. begin
  4143. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
  4144. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
  4145. { if the register is used as index and base, we have to increase for base as well
  4146. and adapt base }
  4147. if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
  4148. begin
  4149. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4150. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4151. end;
  4152. end
  4153. else
  4154. begin
  4155. inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
  4156. taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
  4157. end;
  4158. if taicpu(p).oper[0]^.ref^.index<>NR_NO then
  4159. begin
  4160. taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
  4161. taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
  4162. taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
  4163. end;
  4164. RemoveCurrentP(p);
  4165. result:=true;
  4166. exit;
  4167. end;
  4168. end;
  4169. { Change:
  4170. leal/q $x(%reg1),%reg2
  4171. ...
  4172. shll/q $y,%reg2
  4173. To:
  4174. leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
  4175. }
  4176. if MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
  4177. MatchOpType(taicpu(hp1), top_const, top_reg) and
  4178. (taicpu(hp1).oper[0]^.val <= 3) then
  4179. begin
  4180. Multiple := 1 shl taicpu(hp1).oper[0]^.val;
  4181. TransferUsedRegs(TmpUsedRegs);
  4182. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  4183. TempReg := taicpu(hp1).oper[1]^.reg; { Store locally to reduce the number of dereferences }
  4184. if
  4185. { This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
  4186. (this works even if scalefactor is zero) }
  4187. ((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
  4188. { Ensure offset doesn't go out of bounds }
  4189. (abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
  4190. not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
  4191. MatchOperand(taicpu(p).oper[1]^, TempReg) and
  4192. (
  4193. (
  4194. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, TempReg) and
  4195. (
  4196. (taicpu(p).oper[0]^.ref^.index = NR_NO) or
  4197. (taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
  4198. (
  4199. { Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
  4200. (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
  4201. (taicpu(p).oper[0]^.ref^.scalefactor <= 1)
  4202. )
  4203. )
  4204. ) or (
  4205. (
  4206. (taicpu(p).oper[0]^.ref^.base = NR_NO) or
  4207. (taicpu(p).oper[0]^.ref^.base = NR_INVALID)
  4208. ) and
  4209. not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, TempReg)
  4210. )
  4211. ) then
  4212. begin
  4213. repeat
  4214. with taicpu(p).oper[0]^.ref^ do
  4215. begin
  4216. { Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
  4217. if index = base then
  4218. begin
  4219. if Multiple > 4 then
  4220. { Optimisation will no longer work because resultant
  4221. scale factor will exceed 8 }
  4222. Break;
  4223. base := NR_NO;
  4224. scalefactor := 2;
  4225. DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
  4226. end
  4227. else if (base <> NR_NO) and (base <> NR_INVALID) then
  4228. begin
  4229. { Scale factor only works on the index register }
  4230. index := base;
  4231. base := NR_NO;
  4232. end;
  4233. { For safety }
  4234. if scalefactor <= 1 then
  4235. begin
  4236. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
  4237. scalefactor := Multiple;
  4238. end
  4239. else
  4240. begin
  4241. DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
  4242. scalefactor := scalefactor * Multiple;
  4243. end;
  4244. offset := offset * Multiple;
  4245. end;
  4246. RemoveInstruction(hp1);
  4247. Result := True;
  4248. Exit;
  4249. { This repeat..until loop exists for the benefit of Break }
  4250. until True;
  4251. end;
  4252. end;
  4253. end;
  4254. end;
  4255. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  4256. var
  4257. hp1 : tai;
  4258. begin
  4259. DoSubAddOpt := False;
  4260. if GetLastInstruction(p, hp1) and
  4261. (hp1.typ = ait_instruction) and
  4262. (taicpu(hp1).opsize = taicpu(p).opsize) then
  4263. case taicpu(hp1).opcode Of
  4264. A_DEC:
  4265. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  4266. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  4267. begin
  4268. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  4269. RemoveInstruction(hp1);
  4270. end;
  4271. A_SUB:
  4272. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4273. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4274. begin
  4275. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  4276. RemoveInstruction(hp1);
  4277. end;
  4278. A_ADD:
  4279. begin
  4280. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  4281. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  4282. begin
  4283. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  4284. RemoveInstruction(hp1);
  4285. if (taicpu(p).oper[0]^.val = 0) then
  4286. begin
  4287. hp1 := tai(p.next);
  4288. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  4289. if not GetLastInstruction(hp1, p) then
  4290. p := hp1;
  4291. DoSubAddOpt := True;
  4292. end
  4293. end;
  4294. end;
  4295. else
  4296. ;
  4297. end;
  4298. end;
  4299. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  4300. {$ifdef i386}
  4301. var
  4302. hp1 : tai;
  4303. {$endif i386}
  4304. begin
  4305. Result:=false;
  4306. { * change "subl $2, %esp; pushw x" to "pushl x"}
  4307. { * change "sub/add const1, reg" or "dec reg" followed by
  4308. "sub const2, reg" to one "sub ..., reg" }
  4309. if MatchOpType(taicpu(p),top_const,top_reg) then
  4310. begin
  4311. {$ifdef i386}
  4312. if (taicpu(p).oper[0]^.val = 2) and
  4313. (taicpu(p).oper[1]^.reg = NR_ESP) and
  4314. { Don't do the sub/push optimization if the sub }
  4315. { comes from setting up the stack frame (JM) }
  4316. (not(GetLastInstruction(p,hp1)) or
  4317. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  4318. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  4319. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  4320. begin
  4321. hp1 := tai(p.next);
  4322. while Assigned(hp1) and
  4323. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  4324. not RegReadByInstruction(NR_ESP,hp1) and
  4325. not RegModifiedByInstruction(NR_ESP,hp1) do
  4326. hp1 := tai(hp1.next);
  4327. if Assigned(hp1) and
  4328. MatchInstruction(hp1,A_PUSH,[S_W]) then
  4329. begin
  4330. taicpu(hp1).changeopsize(S_L);
  4331. if taicpu(hp1).oper[0]^.typ=top_reg then
  4332. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  4333. hp1 := tai(p.next);
  4334. RemoveCurrentp(p, hp1);
  4335. Result:=true;
  4336. exit;
  4337. end;
  4338. end;
  4339. {$endif i386}
  4340. if DoSubAddOpt(p) then
  4341. Result:=true;
  4342. end;
  4343. end;
  4344. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  4345. var
  4346. TmpBool1,TmpBool2 : Boolean;
  4347. tmpref : treference;
  4348. hp1,hp2: tai;
  4349. mask: tcgint;
  4350. begin
  4351. Result:=false;
  4352. { All these optimisations work on "shl/sal const,%reg" }
  4353. if not MatchOpType(taicpu(p),top_const,top_reg) then
  4354. Exit;
  4355. if (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  4356. (taicpu(p).oper[0]^.val <= 3) then
  4357. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  4358. begin
  4359. { should we check the next instruction? }
  4360. TmpBool1 := True;
  4361. { have we found an add/sub which could be
  4362. integrated in the lea? }
  4363. TmpBool2 := False;
  4364. reference_reset(tmpref,2,[]);
  4365. TmpRef.index := taicpu(p).oper[1]^.reg;
  4366. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4367. while TmpBool1 and
  4368. GetNextInstruction(p, hp1) and
  4369. (tai(hp1).typ = ait_instruction) and
  4370. ((((taicpu(hp1).opcode = A_ADD) or
  4371. (taicpu(hp1).opcode = A_SUB)) and
  4372. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  4373. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  4374. (((taicpu(hp1).opcode = A_INC) or
  4375. (taicpu(hp1).opcode = A_DEC)) and
  4376. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4377. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg)) or
  4378. ((taicpu(hp1).opcode = A_LEA) and
  4379. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  4380. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg))) and
  4381. (not GetNextInstruction(hp1,hp2) or
  4382. not instrReadsFlags(hp2)) Do
  4383. begin
  4384. TmpBool1 := False;
  4385. if taicpu(hp1).opcode=A_LEA then
  4386. begin
  4387. if (TmpRef.base = NR_NO) and
  4388. (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
  4389. (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
  4390. (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
  4391. ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
  4392. (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
  4393. begin
  4394. TmpBool1 := True;
  4395. TmpBool2 := True;
  4396. inc(TmpRef.offset, taicpu(hp1).oper[0]^.ref^.offset);
  4397. if taicpu(hp1).oper[0]^.ref^.scalefactor<>0 then
  4398. tmpref.scalefactor:=tmpref.scalefactor*taicpu(hp1).oper[0]^.ref^.scalefactor;
  4399. TmpRef.base := taicpu(hp1).oper[0]^.ref^.base;
  4400. RemoveInstruction(hp1);
  4401. end
  4402. end
  4403. else if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  4404. begin
  4405. TmpBool1 := True;
  4406. TmpBool2 := True;
  4407. case taicpu(hp1).opcode of
  4408. A_ADD:
  4409. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4410. A_SUB:
  4411. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  4412. else
  4413. internalerror(2019050536);
  4414. end;
  4415. RemoveInstruction(hp1);
  4416. end
  4417. else
  4418. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  4419. (((taicpu(hp1).opcode = A_ADD) and
  4420. (TmpRef.base = NR_NO)) or
  4421. (taicpu(hp1).opcode = A_INC) or
  4422. (taicpu(hp1).opcode = A_DEC)) then
  4423. begin
  4424. TmpBool1 := True;
  4425. TmpBool2 := True;
  4426. case taicpu(hp1).opcode of
  4427. A_ADD:
  4428. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  4429. A_INC:
  4430. inc(TmpRef.offset);
  4431. A_DEC:
  4432. dec(TmpRef.offset);
  4433. else
  4434. internalerror(2019050535);
  4435. end;
  4436. RemoveInstruction(hp1);
  4437. end;
  4438. end;
  4439. if TmpBool2
  4440. {$ifndef x86_64}
  4441. or
  4442. ((current_settings.optimizecputype < cpu_Pentium2) and
  4443. (taicpu(p).oper[0]^.val <= 3) and
  4444. not(cs_opt_size in current_settings.optimizerswitches))
  4445. {$endif x86_64}
  4446. then
  4447. begin
  4448. if not(TmpBool2) and
  4449. (taicpu(p).oper[0]^.val=1) then
  4450. begin
  4451. hp1:=taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4452. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  4453. end
  4454. else
  4455. hp1:=taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  4456. taicpu(p).oper[1]^.reg);
  4457. DebugMsg(SPeepholeOptimization + 'ShlAddLeaSubIncDec2Lea',p);
  4458. InsertLLItem(p.previous, p.next, hp1);
  4459. p.free;
  4460. p := hp1;
  4461. end;
  4462. end
  4463. {$ifndef x86_64}
  4464. else if (current_settings.optimizecputype < cpu_Pentium2) then
  4465. begin
  4466. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  4467. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  4468. (unlike shl, which is only Tairable in the U pipe) }
  4469. if taicpu(p).oper[0]^.val=1 then
  4470. begin
  4471. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  4472. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  4473. InsertLLItem(p.previous, p.next, hp1);
  4474. p.free;
  4475. p := hp1;
  4476. end
  4477. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  4478. "shl $3, %reg" to "lea (,%reg,8), %reg }
  4479. else if (taicpu(p).opsize = S_L) and
  4480. (taicpu(p).oper[0]^.val<= 3) then
  4481. begin
  4482. reference_reset(tmpref,2,[]);
  4483. TmpRef.index := taicpu(p).oper[1]^.reg;
  4484. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  4485. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  4486. InsertLLItem(p.previous, p.next, hp1);
  4487. p.free;
  4488. p := hp1;
  4489. end;
  4490. end
  4491. {$endif x86_64}
  4492. else if
  4493. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  4494. (
  4495. (
  4496. MatchInstruction(hp1, A_AND, [taicpu(p).opsize]) and
  4497. SetAndTest(hp1, hp2)
  4498. {$ifdef x86_64}
  4499. ) or
  4500. (
  4501. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  4502. GetNextInstruction(hp1, hp2) and
  4503. MatchInstruction(hp2, A_AND, [taicpu(p).opsize]) and
  4504. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  4505. (taicpu(hp1).oper[1]^.reg = taicpu(hp2).oper[0]^.reg)
  4506. {$endif x86_64}
  4507. )
  4508. ) and
  4509. (taicpu(p).oper[1]^.reg = taicpu(hp2).oper[1]^.reg) then
  4510. begin
  4511. { Change:
  4512. shl x, %reg1
  4513. mov -(1<<x), %reg2
  4514. and %reg2, %reg1
  4515. Or:
  4516. shl x, %reg1
  4517. and -(1<<x), %reg1
  4518. To just:
  4519. shl x, %reg1
  4520. Since the and operation only zeroes bits that are already zero from the shl operation
  4521. }
  4522. case taicpu(p).oper[0]^.val of
  4523. 8:
  4524. mask:=$FFFFFFFFFFFFFF00;
  4525. 16:
  4526. mask:=$FFFFFFFFFFFF0000;
  4527. 32:
  4528. mask:=$FFFFFFFF00000000;
  4529. 63:
  4530. { Constant pre-calculated to prevent overflow errors with Int64 }
  4531. mask:=$8000000000000000;
  4532. else
  4533. begin
  4534. if taicpu(p).oper[0]^.val >= 64 then
  4535. { Shouldn't happen realistically, since the register
  4536. is guaranteed to be set to zero at this point }
  4537. mask := 0
  4538. else
  4539. mask := -(Int64(1 shl taicpu(p).oper[0]^.val));
  4540. end;
  4541. end;
  4542. if taicpu(hp1).oper[0]^.val = mask then
  4543. begin
  4544. { Everything checks out, perform the optimisation, as long as
  4545. the FLAGS register isn't being used}
  4546. TransferUsedRegs(TmpUsedRegs);
  4547. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4548. {$ifdef x86_64}
  4549. if (hp1 <> hp2) then
  4550. begin
  4551. { "shl/mov/and" version }
  4552. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  4553. { Don't do the optimisation if the FLAGS register is in use }
  4554. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)) then
  4555. begin
  4556. DebugMsg(SPeepholeOptimization + 'ShlMovAnd2Shl', p);
  4557. { Don't remove the 'mov' instruction if its register is used elsewhere }
  4558. if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp2, TmpUsedRegs)) then
  4559. begin
  4560. RemoveInstruction(hp1);
  4561. Result := True;
  4562. end;
  4563. { Only set Result to True if the 'mov' instruction was removed }
  4564. RemoveInstruction(hp2);
  4565. end;
  4566. end
  4567. else
  4568. {$endif x86_64}
  4569. begin
  4570. { "shl/and" version }
  4571. { Don't do the optimisation if the FLAGS register is in use }
  4572. if not(RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  4573. begin
  4574. DebugMsg(SPeepholeOptimization + 'ShlAnd2Shl', p);
  4575. RemoveInstruction(hp1);
  4576. Result := True;
  4577. end;
  4578. end;
  4579. Exit;
  4580. end
  4581. else {$ifdef x86_64}if (hp1 = hp2) then{$endif x86_64}
  4582. begin
  4583. { Even if the mask doesn't allow for its removal, we might be
  4584. able to optimise the mask for the "shl/and" version, which
  4585. may permit other peephole optimisations }
  4586. {$ifdef DEBUG_AOPTCPU}
  4587. mask := taicpu(hp1).oper[0]^.val and mask;
  4588. if taicpu(hp1).oper[0]^.val <> mask then
  4589. begin
  4590. DebugMsg(
  4591. SPeepholeOptimization +
  4592. 'Changed mask from $' + debug_tostr(taicpu(hp1).oper[0]^.val) +
  4593. ' to $' + debug_tostr(mask) +
  4594. 'based on previous instruction (ShlAnd2ShlAnd)', hp1);
  4595. taicpu(hp1).oper[0]^.val := mask;
  4596. end;
  4597. {$else DEBUG_AOPTCPU}
  4598. { If debugging is off, just set the operand even if it's the same }
  4599. taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val and mask;
  4600. {$endif DEBUG_AOPTCPU}
  4601. end;
  4602. end;
  4603. {
  4604. change
  4605. shl/sal const,reg
  4606. <op> ...(...,reg,1),...
  4607. into
  4608. <op> ...(...,reg,1 shl const),...
  4609. if const in 1..3
  4610. }
  4611. if MatchOpType(taicpu(p), top_const, top_reg) and
  4612. (taicpu(p).oper[0]^.val in [1..3]) and
  4613. GetNextInstruction(p, hp1) and
  4614. MatchInstruction(hp1,A_MOV,A_LEA,[]) and
  4615. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  4616. (taicpu(p).oper[1]^.reg=taicpu(hp1).oper[0]^.ref^.index) and
  4617. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^.ref^.base) and
  4618. (taicpu(hp1).oper[0]^.ref^.scalefactor in [0,1]) then
  4619. begin
  4620. TransferUsedRegs(TmpUsedRegs);
  4621. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  4622. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  4623. begin
  4624. taicpu(hp1).oper[0]^.ref^.scalefactor:=1 shl taicpu(p).oper[0]^.val;
  4625. DebugMsg(SPeepholeOptimization + 'ShlOp2Op', p);
  4626. RemoveCurrentP(p);
  4627. Result:=true;
  4628. end;
  4629. end;
  4630. end;
  4631. function TX86AsmOptimizer.CheckMemoryWrite(var first_mov, second_mov: taicpu): Boolean;
  4632. var
  4633. CurrentRef: TReference;
  4634. FullReg: TRegister;
  4635. hp1, hp2: tai;
  4636. begin
  4637. Result := False;
  4638. if (first_mov.opsize <> S_B) or (second_mov.opsize <> S_B) then
  4639. Exit;
  4640. { We assume you've checked if the operand is actually a reference by
  4641. this point. If it isn't, you'll most likely get an access violation }
  4642. CurrentRef := first_mov.oper[1]^.ref^;
  4643. { Memory must be aligned }
  4644. if (CurrentRef.offset mod 4) <> 0 then
  4645. Exit;
  4646. Inc(CurrentRef.offset);
  4647. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4648. if MatchOperand(second_mov.oper[0]^, 0) and
  4649. references_equal(second_mov.oper[1]^.ref^, CurrentRef) and
  4650. GetNextInstruction(second_mov, hp1) and
  4651. (hp1.typ = ait_instruction) and
  4652. (taicpu(hp1).opcode = A_MOV) and
  4653. MatchOpType(taicpu(hp1), top_const, top_ref) and
  4654. (taicpu(hp1).oper[0]^.val = 0) then
  4655. begin
  4656. Inc(CurrentRef.offset);
  4657. CurrentRef.alignment := taicpu(hp1).oper[1]^.ref^.alignment; { Otherwise references_equal might return False }
  4658. FullReg := newreg(R_INTREGISTER,getsupreg(first_mov.oper[0]^.reg), R_SUBD);
  4659. if references_equal(taicpu(hp1).oper[1]^.ref^, CurrentRef) then
  4660. begin
  4661. case taicpu(hp1).opsize of
  4662. S_B:
  4663. if GetNextInstruction(hp1, hp2) and
  4664. MatchInstruction(taicpu(hp2), A_MOV, [S_B]) and
  4665. MatchOpType(taicpu(hp2), top_const, top_ref) and
  4666. (taicpu(hp2).oper[0]^.val = 0) then
  4667. begin
  4668. Inc(CurrentRef.offset);
  4669. CurrentRef.alignment := 1; { Otherwise references_equal will return False }
  4670. if references_equal(taicpu(hp2).oper[1]^.ref^, CurrentRef) and
  4671. (taicpu(hp2).opsize = S_B) then
  4672. begin
  4673. RemoveInstruction(hp1);
  4674. RemoveInstruction(hp2);
  4675. first_mov.opsize := S_L;
  4676. if first_mov.oper[0]^.typ = top_reg then
  4677. begin
  4678. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVZX/MOVl', first_mov);
  4679. { Reuse second_mov as a MOVZX instruction }
  4680. second_mov.opcode := A_MOVZX;
  4681. second_mov.opsize := S_BL;
  4682. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4683. second_mov.loadreg(1, FullReg);
  4684. first_mov.oper[0]^.reg := FullReg;
  4685. asml.Remove(second_mov);
  4686. asml.InsertBefore(second_mov, first_mov);
  4687. end
  4688. else
  4689. { It's a value }
  4690. begin
  4691. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVb/MOVb -> MOVl', first_mov);
  4692. RemoveInstruction(second_mov);
  4693. end;
  4694. Result := True;
  4695. Exit;
  4696. end;
  4697. end;
  4698. S_W:
  4699. begin
  4700. RemoveInstruction(hp1);
  4701. first_mov.opsize := S_L;
  4702. if first_mov.oper[0]^.typ = top_reg then
  4703. begin
  4704. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVZX/MOVl', first_mov);
  4705. { Reuse second_mov as a MOVZX instruction }
  4706. second_mov.opcode := A_MOVZX;
  4707. second_mov.opsize := S_BL;
  4708. second_mov.loadreg(0, first_mov.oper[0]^.reg);
  4709. second_mov.loadreg(1, FullReg);
  4710. first_mov.oper[0]^.reg := FullReg;
  4711. asml.Remove(second_mov);
  4712. asml.InsertBefore(second_mov, first_mov);
  4713. end
  4714. else
  4715. { It's a value }
  4716. begin
  4717. DebugMsg(SPeepholeOptimization + 'MOVb/MOVb/MOVw -> MOVl', first_mov);
  4718. RemoveInstruction(second_mov);
  4719. end;
  4720. Result := True;
  4721. Exit;
  4722. end;
  4723. else
  4724. ;
  4725. end;
  4726. end;
  4727. end;
  4728. end;
  4729. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  4730. { returns true if a "continue" should be done after this optimization }
  4731. var
  4732. hp1, hp2: tai;
  4733. begin
  4734. Result := false;
  4735. if MatchOpType(taicpu(p),top_ref) and
  4736. GetNextInstruction(p, hp1) and
  4737. (hp1.typ = ait_instruction) and
  4738. (((taicpu(hp1).opcode = A_FLD) and
  4739. (taicpu(p).opcode = A_FSTP)) or
  4740. ((taicpu(p).opcode = A_FISTP) and
  4741. (taicpu(hp1).opcode = A_FILD))) and
  4742. MatchOpType(taicpu(hp1),top_ref) and
  4743. (taicpu(hp1).opsize = taicpu(p).opsize) and
  4744. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4745. begin
  4746. { replacing fstp f;fld f by fst f is only valid for extended because of rounding or if fastmath is on }
  4747. if ((taicpu(p).opsize=S_FX) or (cs_opt_fastmath in current_settings.optimizerswitches)) and
  4748. GetNextInstruction(hp1, hp2) and
  4749. (hp2.typ = ait_instruction) and
  4750. IsExitCode(hp2) and
  4751. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  4752. not(assigned(current_procinfo.procdef.funcretsym) and
  4753. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  4754. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  4755. begin
  4756. RemoveInstruction(hp1);
  4757. RemoveCurrentP(p, hp2);
  4758. RemoveLastDeallocForFuncRes(p);
  4759. Result := true;
  4760. end
  4761. else
  4762. { we can do this only in fast math mode as fstp is rounding ...
  4763. ... still disabled as it breaks the compiler and/or rtl }
  4764. if ({ (cs_opt_fastmath in current_settings.optimizerswitches) or }
  4765. { ... or if another fstp equal to the first one follows }
  4766. (GetNextInstruction(hp1,hp2) and
  4767. (hp2.typ = ait_instruction) and
  4768. (taicpu(p).opcode=taicpu(hp2).opcode) and
  4769. (taicpu(p).opsize=taicpu(hp2).opsize))
  4770. ) and
  4771. { fst can't store an extended/comp value }
  4772. (taicpu(p).opsize <> S_FX) and
  4773. (taicpu(p).opsize <> S_IQ) then
  4774. begin
  4775. if (taicpu(p).opcode = A_FSTP) then
  4776. taicpu(p).opcode := A_FST
  4777. else
  4778. taicpu(p).opcode := A_FIST;
  4779. DebugMsg(SPeepholeOptimization + 'FstpFld2Fst',p);
  4780. RemoveInstruction(hp1);
  4781. end;
  4782. end;
  4783. end;
  4784. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  4785. var
  4786. hp1, hp2: tai;
  4787. begin
  4788. result:=false;
  4789. if MatchOpType(taicpu(p),top_reg) and
  4790. GetNextInstruction(p, hp1) and
  4791. (hp1.typ = Ait_Instruction) and
  4792. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  4793. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  4794. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  4795. { change to
  4796. fld reg fxxx reg,st
  4797. fxxxp st, st1 (hp1)
  4798. Remark: non commutative operations must be reversed!
  4799. }
  4800. begin
  4801. case taicpu(hp1).opcode Of
  4802. A_FMULP,A_FADDP,
  4803. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4804. begin
  4805. case taicpu(hp1).opcode Of
  4806. A_FADDP: taicpu(hp1).opcode := A_FADD;
  4807. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  4808. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  4809. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  4810. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  4811. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  4812. else
  4813. internalerror(2019050534);
  4814. end;
  4815. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  4816. taicpu(hp1).oper[1]^.reg := NR_ST;
  4817. RemoveCurrentP(p, hp1);
  4818. Result:=true;
  4819. exit;
  4820. end;
  4821. else
  4822. ;
  4823. end;
  4824. end
  4825. else
  4826. if MatchOpType(taicpu(p),top_ref) and
  4827. GetNextInstruction(p, hp2) and
  4828. (hp2.typ = Ait_Instruction) and
  4829. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  4830. (taicpu(p).opsize in [S_FS, S_FL]) and
  4831. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  4832. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  4833. if GetLastInstruction(p, hp1) and
  4834. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  4835. MatchOpType(taicpu(hp1),top_ref) and
  4836. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  4837. if ((taicpu(hp2).opcode = A_FMULP) or
  4838. (taicpu(hp2).opcode = A_FADDP)) then
  4839. { change to
  4840. fld/fst mem1 (hp1) fld/fst mem1
  4841. fld mem1 (p) fadd/
  4842. faddp/ fmul st, st
  4843. fmulp st, st1 (hp2) }
  4844. begin
  4845. RemoveCurrentP(p, hp1);
  4846. if (taicpu(hp2).opcode = A_FADDP) then
  4847. taicpu(hp2).opcode := A_FADD
  4848. else
  4849. taicpu(hp2).opcode := A_FMUL;
  4850. taicpu(hp2).oper[1]^.reg := NR_ST;
  4851. end
  4852. else
  4853. { change to
  4854. fld/fst mem1 (hp1) fld/fst mem1
  4855. fld mem1 (p) fld st}
  4856. begin
  4857. taicpu(p).changeopsize(S_FL);
  4858. taicpu(p).loadreg(0,NR_ST);
  4859. end
  4860. else
  4861. begin
  4862. case taicpu(hp2).opcode Of
  4863. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  4864. { change to
  4865. fld/fst mem1 (hp1) fld/fst mem1
  4866. fld mem2 (p) fxxx mem2
  4867. fxxxp st, st1 (hp2) }
  4868. begin
  4869. case taicpu(hp2).opcode Of
  4870. A_FADDP: taicpu(p).opcode := A_FADD;
  4871. A_FMULP: taicpu(p).opcode := A_FMUL;
  4872. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  4873. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  4874. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  4875. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  4876. else
  4877. internalerror(2019050533);
  4878. end;
  4879. RemoveInstruction(hp2);
  4880. end
  4881. else
  4882. ;
  4883. end
  4884. end
  4885. end;
  4886. function IsCmpSubset(cond1, cond2: TAsmCond): Boolean; inline;
  4887. begin
  4888. Result := condition_in(cond1, cond2) or
  4889. { Not strictly subsets due to the actual flags checked, but because we're
  4890. comparing integers, E is a subset of AE and GE and their aliases }
  4891. ((cond1 in [C_E, C_Z]) and (cond2 in [C_AE, C_NB, C_NC, C_GE, C_NL]));
  4892. end;
  4893. function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean;
  4894. var
  4895. v: TCGInt;
  4896. hp1, hp2, p_dist, p_jump, hp1_dist, p_label, hp1_label: tai;
  4897. FirstMatch: Boolean;
  4898. JumpLabel, JumpLabel_dist, JumpLabel_far: TAsmLabel;
  4899. begin
  4900. Result:=false;
  4901. { All these optimisations need a next instruction }
  4902. if not GetNextInstruction(p, hp1) then
  4903. Exit;
  4904. { Search for:
  4905. cmp ###,###
  4906. j(c1) @lbl1
  4907. ...
  4908. @lbl:
  4909. cmp ###.### (same comparison as above)
  4910. j(c2) @lbl2
  4911. If c1 is a subset of c2, change to:
  4912. cmp ###,###
  4913. j(c2) @lbl2
  4914. (@lbl1 may become a dead label as a result)
  4915. }
  4916. { Also handle cases where there are multiple jumps in a row }
  4917. p_jump := hp1;
  4918. while Assigned(p_jump) and MatchInstruction(p_jump, A_JCC, []) do
  4919. begin
  4920. if IsJumpToLabel(taicpu(p_jump)) then
  4921. begin
  4922. JumpLabel := TAsmLabel(taicpu(p_jump).oper[0]^.ref^.symbol);
  4923. p_label := nil;
  4924. if Assigned(JumpLabel) then
  4925. p_label := getlabelwithsym(JumpLabel);
  4926. if Assigned(p_label) and
  4927. GetNextInstruction(p_label, p_dist) and
  4928. MatchInstruction(p_dist, A_CMP, []) and
  4929. MatchOperand(taicpu(p_dist).oper[0]^, taicpu(p).oper[0]^) and
  4930. MatchOperand(taicpu(p_dist).oper[1]^, taicpu(p).oper[1]^) and
  4931. GetNextInstruction(p_dist, hp1_dist) and
  4932. MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
  4933. begin
  4934. JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
  4935. if JumpLabel = JumpLabel_dist then
  4936. { This is an infinite loop }
  4937. Exit;
  4938. { Best optimisation when the first condition is a subset (or equal) of the second }
  4939. if IsCmpSubset(taicpu(p_jump).condition, taicpu(hp1_dist).condition) then
  4940. begin
  4941. { Any registers used here will already be allocated }
  4942. if Assigned(JumpLabel_dist) then
  4943. JumpLabel_dist.IncRefs;
  4944. if Assigned(JumpLabel) then
  4945. JumpLabel.DecRefs;
  4946. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc -> CMP/Jcc, redirecting first jump', p_jump);
  4947. taicpu(p_jump).condition := taicpu(hp1_dist).condition;
  4948. taicpu(p_jump).loadref(0, taicpu(hp1_dist).oper[0]^.ref^);
  4949. Result := True;
  4950. { Don't exit yet. Since p and p_jump haven't actually been
  4951. removed, we can check for more on this iteration }
  4952. end
  4953. else if IsCmpSubset(taicpu(hp1_dist).condition, inverse_cond(taicpu(p_jump).condition)) and
  4954. GetNextInstruction(hp1_dist, hp1_label) and
  4955. SkipAligns(hp1_label, hp1_label) and
  4956. (hp1_label.typ = ait_label) then
  4957. begin
  4958. JumpLabel_far := tai_label(hp1_label).labsym;
  4959. if (JumpLabel_far = JumpLabel_dist) or (JumpLabel_far = JumpLabel) then
  4960. { This is an infinite loop }
  4961. Exit;
  4962. if Assigned(JumpLabel_far) then
  4963. begin
  4964. { In this situation, if the first jump branches, the second one will never,
  4965. branch so change the destination label to after the second jump }
  4966. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/@Lbl/CMP/Jcc/@Lbl -> CMP/Jcc, redirecting first jump to 2nd label', p_jump);
  4967. if Assigned(JumpLabel) then
  4968. JumpLabel.DecRefs;
  4969. JumpLabel_far.IncRefs;
  4970. taicpu(p_jump).oper[0]^.ref^.symbol := JumpLabel_far;
  4971. Result := True;
  4972. { Don't exit yet. Since p and p_jump haven't actually been
  4973. removed, we can check for more on this iteration }
  4974. Continue;
  4975. end;
  4976. end;
  4977. end;
  4978. end;
  4979. { Search for:
  4980. cmp ###,###
  4981. j(c1) @lbl1
  4982. cmp ###,### (same as first)
  4983. Remove second cmp
  4984. }
  4985. if GetNextInstruction(p_jump, hp2) and
  4986. (
  4987. (
  4988. MatchInstruction(hp2, A_CMP, []) and
  4989. (
  4990. (
  4991. MatchOpType(taicpu(p), top_const, top_reg) and
  4992. (taicpu(hp2).oper[0]^.val = taicpu(p).oper[0]^.val) and
  4993. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  4994. ) or (
  4995. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  4996. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^)
  4997. )
  4998. )
  4999. ) or (
  5000. { Also match cmp $0,%reg; jcc @lbl; test %reg,%reg }
  5001. MatchOperand(taicpu(p).oper[0]^, 0) and
  5002. (taicpu(p).oper[1]^.typ = top_reg) and
  5003. MatchInstruction(hp2, A_TEST, []) and
  5004. MatchOpType(taicpu(hp2), top_reg, top_reg) and
  5005. (taicpu(hp2).oper[0]^.reg = taicpu(hp2).oper[1]^.reg) and
  5006. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
  5007. )
  5008. ) then
  5009. begin
  5010. DebugMsg(SPeepholeOptimization + 'CMP/Jcc/CMP; removed superfluous CMP', hp2);
  5011. RemoveInstruction(hp2);
  5012. Result := True;
  5013. { Continue the while loop in case "Jcc/CMP" follows the second CMP that was just removed }
  5014. end;
  5015. GetNextInstruction(p_jump, p_jump);
  5016. end;
  5017. if taicpu(p).oper[0]^.typ = top_const then
  5018. begin
  5019. if (taicpu(p).oper[0]^.val = 0) and
  5020. (taicpu(p).oper[1]^.typ = top_reg) and
  5021. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then
  5022. begin
  5023. hp2 := p;
  5024. FirstMatch := True;
  5025. { When dealing with "cmp $0,%reg", only ZF and SF contain
  5026. anything meaningful once it's converted to "test %reg,%reg";
  5027. additionally, some jumps will always (or never) branch, so
  5028. evaluate every jump immediately following the
  5029. comparison, optimising the conditions if possible.
  5030. Similarly with SETcc... those that are always set to 0 or 1
  5031. are changed to MOV instructions }
  5032. while FirstMatch or { Saves calling GetNextInstruction unnecessarily }
  5033. (
  5034. GetNextInstruction(hp2, hp1) and
  5035. MatchInstruction(hp1,A_Jcc,A_SETcc,[])
  5036. ) do
  5037. begin
  5038. FirstMatch := False;
  5039. case taicpu(hp1).condition of
  5040. C_B, C_C, C_NAE, C_O:
  5041. { For B/NAE:
  5042. Will never branch since an unsigned integer can never be below zero
  5043. For C/O:
  5044. Result cannot overflow because 0 is being subtracted
  5045. }
  5046. begin
  5047. if taicpu(hp1).opcode = A_Jcc then
  5048. begin
  5049. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1);
  5050. TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
  5051. RemoveInstruction(hp1);
  5052. { Since hp1 was deleted, hp2 must not be updated }
  5053. Continue;
  5054. end
  5055. else
  5056. begin
  5057. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1);
  5058. { Convert "set(c) %reg" instruction to "movb 0,%reg" }
  5059. taicpu(hp1).opcode := A_MOV;
  5060. taicpu(hp1).ops := 2;
  5061. taicpu(hp1).condition := C_None;
  5062. taicpu(hp1).opsize := S_B;
  5063. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5064. taicpu(hp1).loadconst(0, 0);
  5065. end;
  5066. end;
  5067. C_BE, C_NA:
  5068. begin
  5069. { Will only branch if equal to zero }
  5070. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1);
  5071. taicpu(hp1).condition := C_E;
  5072. end;
  5073. C_A, C_NBE:
  5074. begin
  5075. { Will only branch if not equal to zero }
  5076. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1);
  5077. taicpu(hp1).condition := C_NE;
  5078. end;
  5079. C_AE, C_NB, C_NC, C_NO:
  5080. begin
  5081. { Will always branch }
  5082. DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1);
  5083. if taicpu(hp1).opcode = A_Jcc then
  5084. begin
  5085. MakeUnconditional(taicpu(hp1));
  5086. { Any jumps/set that follow will now be dead code }
  5087. RemoveDeadCodeAfterJump(taicpu(hp1));
  5088. Break;
  5089. end
  5090. else
  5091. begin
  5092. { Convert "set(c) %reg" instruction to "movb 1,%reg" }
  5093. taicpu(hp1).opcode := A_MOV;
  5094. taicpu(hp1).ops := 2;
  5095. taicpu(hp1).condition := C_None;
  5096. taicpu(hp1).opsize := S_B;
  5097. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  5098. taicpu(hp1).loadconst(0, 1);
  5099. end;
  5100. end;
  5101. C_None:
  5102. InternalError(2020012201);
  5103. C_P, C_PE, C_NP, C_PO:
  5104. { We can't handle parity checks and they should never be generated
  5105. after a general-purpose CMP (it's used in some floating-point
  5106. comparisons that don't use CMP) }
  5107. InternalError(2020012202);
  5108. else
  5109. { Zero/Equality, Sign, their complements and all of the
  5110. signed comparisons do not need to be converted };
  5111. end;
  5112. hp2 := hp1;
  5113. end;
  5114. { Convert the instruction to a TEST }
  5115. taicpu(p).opcode := A_TEST;
  5116. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5117. Result := True;
  5118. Exit;
  5119. end
  5120. else if (taicpu(p).oper[0]^.val = 1) and
  5121. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5122. (taicpu(hp1).condition in [C_L, C_NGE]) then
  5123. begin
  5124. { Convert; To:
  5125. cmp $1,r/m cmp $0,r/m
  5126. jl @lbl jle @lbl
  5127. }
  5128. DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p);
  5129. taicpu(p).oper[0]^.val := 0;
  5130. taicpu(hp1).condition := C_LE;
  5131. { If the instruction is now "cmp $0,%reg", convert it to a
  5132. TEST (and effectively do the work of the "cmp $0,%reg" in
  5133. the block above)
  5134. If it's a reference, we can get away with not setting
  5135. Result to True because he haven't evaluated the jump
  5136. in this pass yet.
  5137. }
  5138. if (taicpu(p).oper[1]^.typ = top_reg) then
  5139. begin
  5140. taicpu(p).opcode := A_TEST;
  5141. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  5142. Result := True;
  5143. end;
  5144. Exit;
  5145. end
  5146. else if (taicpu(p).oper[1]^.typ = top_reg) then
  5147. begin
  5148. { cmp register,$8000 neg register
  5149. je target --> jo target
  5150. .... only if register is deallocated before jump.}
  5151. case Taicpu(p).opsize of
  5152. S_B: v:=$80;
  5153. S_W: v:=$8000;
  5154. S_L: v:=qword($80000000);
  5155. { S_Q will never happen: cmp with 64 bit constants is not possible }
  5156. S_Q:
  5157. Exit;
  5158. else
  5159. internalerror(2013112905);
  5160. end;
  5161. if (taicpu(p).oper[0]^.val=v) and
  5162. MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and
  5163. (Taicpu(hp1).condition in [C_E,C_NE]) then
  5164. begin
  5165. TransferUsedRegs(TmpUsedRegs);
  5166. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  5167. if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then
  5168. begin
  5169. DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p);
  5170. Taicpu(p).opcode:=A_NEG;
  5171. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  5172. Taicpu(p).clearop(1);
  5173. Taicpu(p).ops:=1;
  5174. if Taicpu(hp1).condition=C_E then
  5175. Taicpu(hp1).condition:=C_O
  5176. else
  5177. Taicpu(hp1).condition:=C_NO;
  5178. Result:=true;
  5179. exit;
  5180. end;
  5181. end;
  5182. end;
  5183. end;
  5184. if (taicpu(p).oper[1]^.typ = top_reg) and
  5185. MatchInstruction(hp1,A_MOV,[]) and
  5186. not RegInInstruction(taicpu(p).oper[1]^.reg, hp1) and
  5187. (
  5188. (taicpu(p).oper[0]^.typ <> top_reg) or
  5189. not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
  5190. ) then
  5191. begin
  5192. { If we have something like:
  5193. cmp ###,%reg1
  5194. mov 0,%reg2
  5195. And no registers are shared, move the MOV command to before the
  5196. comparison as this means it can be optimised without worrying
  5197. about the FLAGS register. (This combination is generated by
  5198. "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
  5199. }
  5200. SwapMovCmp(p, hp1);
  5201. Result := True;
  5202. Exit;
  5203. end;
  5204. end;
  5205. function TX86AsmOptimizer.OptPass1PXor(var p: tai): boolean;
  5206. var
  5207. hp1: tai;
  5208. begin
  5209. {
  5210. remove the second (v)pxor from
  5211. pxor reg,reg
  5212. ...
  5213. pxor reg,reg
  5214. }
  5215. Result:=false;
  5216. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5217. MatchOpType(taicpu(p),top_reg,top_reg) and
  5218. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5219. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5220. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5221. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) then
  5222. begin
  5223. DebugMsg(SPeepholeOptimization + 'PXorPXor2PXor done',hp1);
  5224. RemoveInstruction(hp1);
  5225. Result:=true;
  5226. Exit;
  5227. end
  5228. {
  5229. replace
  5230. pxor reg1,reg1
  5231. movapd/s reg1,reg2
  5232. dealloc reg1
  5233. by
  5234. pxor reg2,reg2
  5235. }
  5236. else if GetNextInstruction(p,hp1) and
  5237. { we mix single and double opperations here because we assume that the compiler
  5238. generates vmovapd only after double operations and vmovaps only after single operations }
  5239. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  5240. MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
  5241. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  5242. (taicpu(p).oper[0]^.typ=top_reg) then
  5243. begin
  5244. TransferUsedRegs(TmpUsedRegs);
  5245. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5246. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  5247. begin
  5248. taicpu(p).loadoper(0,taicpu(hp1).oper[1]^);
  5249. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  5250. DebugMsg(SPeepholeOptimization + 'PXorMovapd2PXor done',p);
  5251. RemoveInstruction(hp1);
  5252. result:=true;
  5253. end;
  5254. end;
  5255. end;
  5256. function TX86AsmOptimizer.OptPass1VPXor(var p: tai): boolean;
  5257. var
  5258. hp1: tai;
  5259. begin
  5260. {
  5261. remove the second (v)pxor from
  5262. (v)pxor reg,reg
  5263. ...
  5264. (v)pxor reg,reg
  5265. }
  5266. Result:=false;
  5267. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^,taicpu(p).oper[2]^) and
  5268. MatchOpType(taicpu(p),top_reg,top_reg,top_reg) and
  5269. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  5270. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  5271. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  5272. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^,taicpu(hp1).oper[2]^) then
  5273. begin
  5274. DebugMsg(SPeepholeOptimization + 'VPXorVPXor2PXor done',hp1);
  5275. RemoveInstruction(hp1);
  5276. Result:=true;
  5277. Exit;
  5278. end
  5279. else
  5280. Result:=OptPass1VOP(p);
  5281. end;
  5282. function TX86AsmOptimizer.OptPass1Imul(var p: tai): boolean;
  5283. var
  5284. hp1 : tai;
  5285. begin
  5286. result:=false;
  5287. { replace
  5288. IMul const,%mreg1,%mreg2
  5289. Mov %reg2,%mreg3
  5290. dealloc %mreg3
  5291. by
  5292. Imul const,%mreg1,%mreg23
  5293. }
  5294. if (taicpu(p).ops=3) and
  5295. GetNextInstruction(p,hp1) and
  5296. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5297. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5298. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5299. begin
  5300. TransferUsedRegs(TmpUsedRegs);
  5301. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5302. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5303. begin
  5304. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5305. DebugMsg(SPeepholeOptimization + 'ImulMov2Imul done',p);
  5306. RemoveInstruction(hp1);
  5307. result:=true;
  5308. end;
  5309. end;
  5310. end;
  5311. function TX86AsmOptimizer.OptPass1SHXX(var p: tai): boolean;
  5312. var
  5313. hp1 : tai;
  5314. begin
  5315. result:=false;
  5316. { replace
  5317. IMul %reg0,%reg1,%reg2
  5318. Mov %reg2,%reg3
  5319. dealloc %reg2
  5320. by
  5321. Imul %reg0,%reg1,%reg3
  5322. }
  5323. if GetNextInstruction(p,hp1) and
  5324. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  5325. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  5326. (taicpu(hp1).oper[1]^.typ=top_reg) then
  5327. begin
  5328. TransferUsedRegs(TmpUsedRegs);
  5329. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5330. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  5331. begin
  5332. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  5333. DebugMsg(SPeepholeOptimization + 'SHXXMov2SHXX done',p);
  5334. RemoveInstruction(hp1);
  5335. result:=true;
  5336. end;
  5337. end;
  5338. end;
  5339. function TX86AsmOptimizer.OptPass1Jcc(var p : tai) : boolean;
  5340. var
  5341. hp1, hp2, hp3, hp4, hp5: tai;
  5342. ThisReg: TRegister;
  5343. begin
  5344. Result := False;
  5345. if not GetNextInstruction(p,hp1) or (hp1.typ <> ait_instruction) then
  5346. Exit;
  5347. {
  5348. convert
  5349. j<c> .L1
  5350. mov 1,reg
  5351. jmp .L2
  5352. .L1
  5353. mov 0,reg
  5354. .L2
  5355. into
  5356. mov 0,reg
  5357. set<not(c)> reg
  5358. take care of alignment and that the mov 0,reg is not converted into a xor as this
  5359. would destroy the flag contents
  5360. Use MOVZX if size is preferred, since while mov 0,reg is bigger, it can be
  5361. executed at the same time as a previous comparison.
  5362. set<not(c)> reg
  5363. movzx reg, reg
  5364. }
  5365. if MatchInstruction(hp1,A_MOV,[]) and
  5366. (taicpu(hp1).oper[0]^.typ = top_const) and
  5367. (
  5368. (
  5369. (taicpu(hp1).oper[1]^.typ = top_reg)
  5370. {$ifdef i386}
  5371. { Under i386, ESI, EDI, EBP and ESP
  5372. don't have an 8-bit representation }
  5373. and not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  5374. {$endif i386}
  5375. ) or (
  5376. {$ifdef i386}
  5377. (taicpu(hp1).oper[1]^.typ <> top_reg) and
  5378. {$endif i386}
  5379. (taicpu(hp1).opsize = S_B)
  5380. )
  5381. ) and
  5382. GetNextInstruction(hp1,hp2) and
  5383. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  5384. GetNextInstruction(hp2,hp3) and
  5385. SkipAligns(hp3, hp3) and
  5386. (hp3.typ=ait_label) and
  5387. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  5388. GetNextInstruction(hp3,hp4) and
  5389. MatchInstruction(hp4,A_MOV,[taicpu(hp1).opsize]) and
  5390. (taicpu(hp4).oper[0]^.typ = top_const) and
  5391. (
  5392. ((taicpu(hp1).oper[0]^.val = 0) and (taicpu(hp4).oper[0]^.val = 1)) or
  5393. ((taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0))
  5394. ) and
  5395. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  5396. GetNextInstruction(hp4,hp5) and
  5397. SkipAligns(hp5, hp5) and
  5398. (hp5.typ=ait_label) and
  5399. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) then
  5400. begin
  5401. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5402. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  5403. tai_label(hp3).labsym.DecRefs;
  5404. { If this isn't the only reference to the middle label, we can
  5405. still make a saving - only that the first jump and everything
  5406. that follows will remain. }
  5407. if (tai_label(hp3).labsym.getrefs = 0) then
  5408. begin
  5409. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5410. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c)',p)
  5411. else
  5412. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c)',p);
  5413. { remove jump, first label and second MOV (also catching any aligns) }
  5414. repeat
  5415. if not GetNextInstruction(hp2, hp3) then
  5416. InternalError(2021040810);
  5417. RemoveInstruction(hp2);
  5418. hp2 := hp3;
  5419. until hp2 = hp5;
  5420. { Don't decrement reference count before the removal loop
  5421. above, otherwise GetNextInstruction won't stop on the
  5422. the label }
  5423. tai_label(hp5).labsym.DecRefs;
  5424. end
  5425. else
  5426. begin
  5427. if (taicpu(hp1).oper[0]^.val = 1) and (taicpu(hp4).oper[0]^.val = 0) then
  5428. DebugMsg(SPeepholeOptimization + 'J(c)Mov1JmpMov0 -> Set(~c) (partial)',p)
  5429. else
  5430. DebugMsg(SPeepholeOptimization + 'J(c)Mov0JmpMov1 -> Set(c) (partial)',p);
  5431. end;
  5432. taicpu(p).opcode:=A_SETcc;
  5433. taicpu(p).opsize:=S_B;
  5434. taicpu(p).is_jmp:=False;
  5435. if taicpu(hp1).opsize=S_B then
  5436. begin
  5437. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  5438. RemoveInstruction(hp1);
  5439. end
  5440. else
  5441. begin
  5442. { Will be a register because the size can't be S_B otherwise }
  5443. ThisReg := newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBL);
  5444. taicpu(p).loadreg(0, ThisReg);
  5445. if (cs_opt_size in current_settings.optimizerswitches) and IsMOVZXAcceptable then
  5446. begin
  5447. case taicpu(hp1).opsize of
  5448. S_W:
  5449. taicpu(hp1).opsize := S_BW;
  5450. S_L:
  5451. taicpu(hp1).opsize := S_BL;
  5452. {$ifdef x86_64}
  5453. S_Q:
  5454. begin
  5455. taicpu(hp1).opsize := S_BL;
  5456. { Change the destination register to 32-bit }
  5457. taicpu(hp1).loadreg(1, newreg(R_INTREGISTER,getsupreg(ThisReg), R_SUBD));
  5458. end;
  5459. {$endif x86_64}
  5460. else
  5461. InternalError(2021040820);
  5462. end;
  5463. taicpu(hp1).opcode := A_MOVZX;
  5464. taicpu(hp1).loadreg(0, ThisReg);
  5465. end
  5466. else
  5467. begin
  5468. AllocRegBetween(NR_FLAGS,p,hp1,UsedRegs);
  5469. { hp1 is already a MOV instruction with the correct register }
  5470. taicpu(hp1).loadconst(0, 0);
  5471. { Inserting it right before p will guarantee that the flags are also tracked }
  5472. asml.Remove(hp1);
  5473. asml.InsertBefore(hp1, p);
  5474. end;
  5475. end;
  5476. Result:=true;
  5477. exit;
  5478. end
  5479. end;
  5480. function TX86AsmOptimizer.CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
  5481. var
  5482. hp2, hp3, first_assignment: tai;
  5483. IncCount, OperIdx: Integer;
  5484. OrigLabel: TAsmLabel;
  5485. begin
  5486. Count := 0;
  5487. Result := False;
  5488. first_assignment := nil;
  5489. if (LoopCount >= 20) then
  5490. begin
  5491. { Guard against infinite loops }
  5492. Exit;
  5493. end;
  5494. if (taicpu(p).oper[0]^.typ <> top_ref) or
  5495. (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) or
  5496. (taicpu(p).oper[0]^.ref^.base <> NR_NO) or
  5497. (taicpu(p).oper[0]^.ref^.index <> NR_NO) or
  5498. not (taicpu(p).oper[0]^.ref^.symbol is TAsmLabel) then
  5499. Exit;
  5500. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  5501. {
  5502. change
  5503. jmp .L1
  5504. ...
  5505. .L1:
  5506. mov ##, ## ( multiple movs possible )
  5507. jmp/ret
  5508. into
  5509. mov ##, ##
  5510. jmp/ret
  5511. }
  5512. if not Assigned(hp1) then
  5513. begin
  5514. hp1 := GetLabelWithSym(OrigLabel);
  5515. if not Assigned(hp1) or not SkipLabels(hp1, hp1) then
  5516. Exit;
  5517. end;
  5518. hp2 := hp1;
  5519. while Assigned(hp2) do
  5520. begin
  5521. if Assigned(hp2) and (hp2.typ in [ait_label, ait_align]) then
  5522. SkipLabels(hp2,hp2);
  5523. if not Assigned(hp2) or (hp2.typ <> ait_instruction) then
  5524. Break;
  5525. case taicpu(hp2).opcode of
  5526. A_MOVSS:
  5527. begin
  5528. if taicpu(hp2).ops = 0 then
  5529. { Wrong MOVSS }
  5530. Break;
  5531. Inc(Count);
  5532. if Count >= 5 then
  5533. { Too many to be worthwhile }
  5534. Break;
  5535. GetNextInstruction(hp2, hp2);
  5536. Continue;
  5537. end;
  5538. A_MOV,
  5539. A_MOVD,
  5540. A_MOVQ,
  5541. A_MOVSX,
  5542. {$ifdef x86_64}
  5543. A_MOVSXD,
  5544. {$endif x86_64}
  5545. A_MOVZX,
  5546. A_MOVAPS,
  5547. A_MOVUPS,
  5548. A_MOVSD,
  5549. A_MOVAPD,
  5550. A_MOVUPD,
  5551. A_MOVDQA,
  5552. A_MOVDQU,
  5553. A_VMOVSS,
  5554. A_VMOVAPS,
  5555. A_VMOVUPS,
  5556. A_VMOVSD,
  5557. A_VMOVAPD,
  5558. A_VMOVUPD,
  5559. A_VMOVDQA,
  5560. A_VMOVDQU:
  5561. begin
  5562. Inc(Count);
  5563. if Count >= 5 then
  5564. { Too many to be worthwhile }
  5565. Break;
  5566. GetNextInstruction(hp2, hp2);
  5567. Continue;
  5568. end;
  5569. A_JMP:
  5570. begin
  5571. { Guard against infinite loops }
  5572. if taicpu(hp2).oper[0]^.ref^.symbol = OrigLabel then
  5573. Exit;
  5574. { Analyse this jump first in case it also duplicates assignments }
  5575. if CheckJumpMovTransferOpt(hp2, nil, LoopCount + 1, IncCount) then
  5576. begin
  5577. { Something did change! }
  5578. Result := True;
  5579. Inc(Count, IncCount);
  5580. if Count >= 5 then
  5581. begin
  5582. { Too many to be worthwhile }
  5583. Exit;
  5584. end;
  5585. if MatchInstruction(hp2, [A_JMP, A_RET], []) then
  5586. Break;
  5587. end;
  5588. Result := True;
  5589. Break;
  5590. end;
  5591. A_RET:
  5592. begin
  5593. Result := True;
  5594. Break;
  5595. end;
  5596. else
  5597. Break;
  5598. end;
  5599. end;
  5600. if Result then
  5601. begin
  5602. { A count of zero can happen when CheckJumpMovTransferOpt is called recursively }
  5603. if Count = 0 then
  5604. begin
  5605. Result := False;
  5606. Exit;
  5607. end;
  5608. hp3 := p;
  5609. DebugMsg(SPeepholeOptimization + 'Duplicated ' + debug_tostr(Count) + ' assignment(s) and redirected jump', p);
  5610. while True do
  5611. begin
  5612. if Assigned(hp1) and (hp1.typ in [ait_label, ait_align]) then
  5613. SkipLabels(hp1,hp1);
  5614. if (hp1.typ <> ait_instruction) then
  5615. InternalError(2021040720);
  5616. case taicpu(hp1).opcode of
  5617. A_JMP:
  5618. begin
  5619. { Change the original jump to the new destination }
  5620. OrigLabel.decrefs;
  5621. taicpu(hp1).oper[0]^.ref^.symbol.increfs;
  5622. taicpu(p).loadref(0, taicpu(hp1).oper[0]^.ref^);
  5623. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5624. if not Assigned(first_assignment) then
  5625. InternalError(2021040810)
  5626. else
  5627. p := first_assignment;
  5628. Exit;
  5629. end;
  5630. A_RET:
  5631. begin
  5632. { Now change the jump into a RET instruction }
  5633. ConvertJumpToRET(p, hp1);
  5634. { Set p to the first duplicated assignment so it can get optimised if needs be }
  5635. if not Assigned(first_assignment) then
  5636. InternalError(2021040811)
  5637. else
  5638. p := first_assignment;
  5639. Exit;
  5640. end;
  5641. else
  5642. begin
  5643. { Duplicate the MOV instruction }
  5644. hp3:=tai(hp1.getcopy);
  5645. if first_assignment = nil then
  5646. first_assignment := hp3;
  5647. asml.InsertBefore(hp3, p);
  5648. { Make sure the compiler knows about any final registers written here }
  5649. for OperIdx := 0 to taicpu(hp3).ops - 1 do
  5650. with taicpu(hp3).oper[OperIdx]^ do
  5651. begin
  5652. case typ of
  5653. top_ref:
  5654. begin
  5655. if (ref^.base <> NR_NO) and
  5656. (getsupreg(ref^.base) <> RS_ESP) and
  5657. (getsupreg(ref^.base) <> RS_EBP)
  5658. {$ifdef x86_64} and (ref^.base <> NR_RIP) {$endif x86_64}
  5659. then
  5660. AllocRegBetween(ref^.base, hp3, tai(p.Next), UsedRegs);
  5661. if (ref^.index <> NR_NO) and
  5662. (getsupreg(ref^.index) <> RS_ESP) and
  5663. (getsupreg(ref^.index) <> RS_EBP)
  5664. {$ifdef x86_64} and (ref^.index <> NR_RIP) {$endif x86_64} and
  5665. (ref^.index <> ref^.base) then
  5666. AllocRegBetween(ref^.index, hp3, tai(p.Next), UsedRegs);
  5667. end;
  5668. top_reg:
  5669. AllocRegBetween(reg, hp3, tai(p.Next), UsedRegs);
  5670. else
  5671. ;
  5672. end;
  5673. end;
  5674. end;
  5675. end;
  5676. if not GetNextInstruction(hp1, hp1) then
  5677. { Should have dropped out earlier }
  5678. InternalError(2021040710);
  5679. end;
  5680. end;
  5681. end;
  5682. procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
  5683. var
  5684. hp2: tai;
  5685. X: Integer;
  5686. begin
  5687. asml.Remove(hp1);
  5688. { Try to insert after the last instructions where the FLAGS register is not yet in use }
  5689. if not GetLastInstruction(p, hp2) then
  5690. asml.InsertBefore(hp1, p)
  5691. else
  5692. asml.InsertAfter(hp1, hp2);
  5693. DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
  5694. for X := 0 to 1 do
  5695. case taicpu(hp1).oper[X]^.typ of
  5696. top_reg:
  5697. AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
  5698. top_ref:
  5699. begin
  5700. if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
  5701. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
  5702. if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
  5703. AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
  5704. end;
  5705. else
  5706. ;
  5707. end;
  5708. end;
  5709. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  5710. function IsXCHGAcceptable: Boolean; inline;
  5711. begin
  5712. { Always accept if optimising for size }
  5713. Result := (cs_opt_size in current_settings.optimizerswitches) or
  5714. (
  5715. {$ifdef x86_64}
  5716. { XCHG takes 3 cycles on AMD Athlon64 }
  5717. (current_settings.optimizecputype >= cpu_core_i)
  5718. {$else x86_64}
  5719. { From the Pentium M onwards, XCHG only has a latency of 2 rather
  5720. than 3, so it becomes a saving compared to three MOVs with two of
  5721. them able to execute simultaneously. [Kit] }
  5722. (current_settings.optimizecputype >= cpu_PentiumM)
  5723. {$endif x86_64}
  5724. );
  5725. end;
  5726. var
  5727. NewRef: TReference;
  5728. hp1, hp2, hp3, hp4: Tai;
  5729. {$ifndef x86_64}
  5730. OperIdx: Integer;
  5731. {$endif x86_64}
  5732. NewInstr : Taicpu;
  5733. NewAligh : Tai_align;
  5734. DestLabel: TAsmLabel;
  5735. begin
  5736. Result:=false;
  5737. { This optimisation adds an instruction, so only do it for speed }
  5738. if not (cs_opt_size in current_settings.optimizerswitches) and
  5739. MatchOpType(taicpu(p), top_const, top_reg) and
  5740. (taicpu(p).oper[0]^.val = 0) then
  5741. begin
  5742. { To avoid compiler warning }
  5743. DestLabel := nil;
  5744. if (p.typ <> ait_instruction) or (taicpu(p).oper[1]^.typ <> top_reg) then
  5745. InternalError(2021040750);
  5746. if not GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) then
  5747. Exit;
  5748. case hp1.typ of
  5749. ait_label:
  5750. begin
  5751. { Change:
  5752. mov $0,%reg mov $0,%reg
  5753. @Lbl1: @Lbl1:
  5754. test %reg,%reg / cmp $0,%reg test %reg,%reg / mov $0,%reg
  5755. je @Lbl2 jne @Lbl2
  5756. To: To:
  5757. mov $0,%reg mov $0,%reg
  5758. jmp @Lbl2 jmp @Lbl3
  5759. (align) (align)
  5760. @Lbl1: @Lbl1:
  5761. test %reg,%reg / cmp $0,%reg test %reg,%reg / cmp $0,%reg
  5762. je @Lbl2 je @Lbl2
  5763. @Lbl3: <-- Only if label exists
  5764. (Not if it's optimised for size)
  5765. }
  5766. if not GetNextInstruction(hp1, hp2) then
  5767. Exit;
  5768. if not (cs_opt_size in current_settings.optimizerswitches) and
  5769. (hp2.typ = ait_instruction) and
  5770. (
  5771. { Register sizes must exactly match }
  5772. (
  5773. (taicpu(hp2).opcode = A_CMP) and
  5774. MatchOperand(taicpu(hp2).oper[0]^, 0) and
  5775. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5776. ) or (
  5777. (taicpu(hp2).opcode = A_TEST) and
  5778. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  5779. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^.reg)
  5780. )
  5781. ) and GetNextInstruction(hp2, hp3) and
  5782. (hp3.typ = ait_instruction) and
  5783. (taicpu(hp3).opcode = A_JCC) and
  5784. (taicpu(hp3).oper[0]^.typ=top_ref) and (taicpu(hp3).oper[0]^.ref^.refaddr=addr_full) and (taicpu(hp3).oper[0]^.ref^.base=NR_NO) and
  5785. (taicpu(hp3).oper[0]^.ref^.index=NR_NO) and (taicpu(hp3).oper[0]^.ref^.symbol is tasmlabel) then
  5786. begin
  5787. { Check condition of jump }
  5788. { Always true? }
  5789. if condition_in(C_E, taicpu(hp3).condition) then
  5790. begin
  5791. { Copy label symbol and obtain matching label entry for the
  5792. conditional jump, as this will be our destination}
  5793. DestLabel := tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol);
  5794. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Je -> Mov0JmpLblCmp0Je', p);
  5795. Result := True;
  5796. end
  5797. { Always false? }
  5798. else if condition_in(C_NE, taicpu(hp3).condition) and GetNextInstruction(hp3, hp2) then
  5799. begin
  5800. { This is only worth it if there's a jump to take }
  5801. case hp2.typ of
  5802. ait_instruction:
  5803. begin
  5804. if taicpu(hp2).opcode = A_JMP then
  5805. begin
  5806. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5807. { An unconditional jump follows the conditional jump which will always be false,
  5808. so use this jump's destination for the new jump }
  5809. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with JMP)', p);
  5810. Result := True;
  5811. end
  5812. else if taicpu(hp2).opcode = A_JCC then
  5813. begin
  5814. DestLabel := tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol);
  5815. if condition_in(C_E, taicpu(hp2).condition) then
  5816. begin
  5817. { A second conditional jump follows the conditional jump which will always be false,
  5818. while the second jump is always True, so use this jump's destination for the new jump }
  5819. DebugMsg(SPeepholeOptimization + 'Mov0LblCmp0Jne -> Mov0JmpLblCmp0Jne (with second Jcc)', p);
  5820. Result := True;
  5821. end;
  5822. { Don't risk it if the jump isn't always true (Result remains False) }
  5823. end;
  5824. end;
  5825. else
  5826. { If anything else don't optimise };
  5827. end;
  5828. end;
  5829. if Result then
  5830. begin
  5831. { Just so we have something to insert as a paremeter}
  5832. reference_reset(NewRef, 1, []);
  5833. NewInstr := taicpu.op_ref(A_JMP, S_NO, NewRef);
  5834. { Now actually load the correct parameter }
  5835. NewInstr.loadsymbol(0, DestLabel, 0);
  5836. { Get instruction before original label (may not be p under -O3) }
  5837. if not GetLastInstruction(hp1, hp2) then
  5838. { Shouldn't fail here }
  5839. InternalError(2021040701);
  5840. DestLabel.increfs;
  5841. AsmL.InsertAfter(NewInstr, hp2);
  5842. { Add new alignment field }
  5843. (* AsmL.InsertAfter(
  5844. cai_align.create_max(
  5845. current_settings.alignment.jumpalign,
  5846. current_settings.alignment.jumpalignskipmax
  5847. ),
  5848. NewInstr
  5849. ); *)
  5850. end;
  5851. Exit;
  5852. end;
  5853. end;
  5854. else
  5855. ;
  5856. end;
  5857. end;
  5858. if not GetNextInstruction(p, hp1) then
  5859. Exit;
  5860. if MatchInstruction(hp1, A_JMP, [S_NO]) then
  5861. begin
  5862. { Sometimes the MOVs that OptPass2JMP produces can be improved
  5863. further, but we can't just put this jump optimisation in pass 1
  5864. because it tends to perform worse when conditional jumps are
  5865. nearby (e.g. when converting CMOV instructions). [Kit] }
  5866. if OptPass2JMP(hp1) then
  5867. { call OptPass1MOV once to potentially merge any MOVs that were created }
  5868. Result := OptPass1MOV(p)
  5869. { OptPass2MOV will now exit but will be called again if OptPass1MOV
  5870. returned True and the instruction is still a MOV, thus checking
  5871. the optimisations below }
  5872. { If OptPass2JMP returned False, no optimisations were done to
  5873. the jump and there are no further optimisations that can be done
  5874. to the MOV instruction on this pass }
  5875. end
  5876. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5877. (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  5878. MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
  5879. MatchOpType(taicpu(hp1),top_const,top_reg) and
  5880. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  5881. { be lazy, checking separately for sub would be slightly better }
  5882. (abs(taicpu(hp1).oper[0]^.val)<=$7fffffff) then
  5883. begin
  5884. { Change:
  5885. movl/q %reg1,%reg2 movl/q %reg1,%reg2
  5886. addl/q $x,%reg2 subl/q $x,%reg2
  5887. To:
  5888. leal/q x(%reg1),%reg2 leal/q -x(%reg1),%reg2
  5889. }
  5890. TransferUsedRegs(TmpUsedRegs);
  5891. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  5892. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  5893. if not GetNextInstruction(hp1, hp2) or
  5894. (
  5895. { The FLAGS register isn't always tracked properly, so do not
  5896. perform this optimisation if a conditional statement follows }
  5897. not RegReadByInstruction(NR_DEFAULTFLAGS, hp2) and
  5898. not RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp2, TmpUsedRegs)
  5899. ) then
  5900. begin
  5901. reference_reset(NewRef, 1, []);
  5902. NewRef.base := taicpu(p).oper[0]^.reg;
  5903. NewRef.scalefactor := 1;
  5904. if taicpu(hp1).opcode = A_ADD then
  5905. begin
  5906. DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
  5907. NewRef.offset := taicpu(hp1).oper[0]^.val;
  5908. end
  5909. else
  5910. begin
  5911. DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
  5912. NewRef.offset := -taicpu(hp1).oper[0]^.val;
  5913. end;
  5914. taicpu(p).opcode := A_LEA;
  5915. taicpu(p).loadref(0, NewRef);
  5916. RemoveInstruction(hp1);
  5917. Result := True;
  5918. Exit;
  5919. end;
  5920. end
  5921. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5922. {$ifdef x86_64}
  5923. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  5924. {$else x86_64}
  5925. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  5926. {$endif x86_64}
  5927. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  5928. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  5929. { mov reg1, reg2 mov reg1, reg2
  5930. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  5931. begin
  5932. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  5933. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  5934. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  5935. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  5936. TransferUsedRegs(TmpUsedRegs);
  5937. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  5938. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  5939. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  5940. then
  5941. begin
  5942. RemoveCurrentP(p, hp1);
  5943. Result:=true;
  5944. end;
  5945. exit;
  5946. end
  5947. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5948. IsXCHGAcceptable and
  5949. { XCHG doesn't support 8-byte registers }
  5950. (taicpu(p).opsize <> S_B) and
  5951. MatchInstruction(hp1, A_MOV, []) and
  5952. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  5953. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  5954. GetNextInstruction(hp1, hp2) and
  5955. MatchInstruction(hp2, A_MOV, []) and
  5956. { Don't need to call MatchOpType for hp2 because the operand matches below cover for it }
  5957. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[1]^.reg) and
  5958. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) then
  5959. begin
  5960. { mov %reg1,%reg2
  5961. mov %reg3,%reg1 -> xchg %reg3,%reg1
  5962. mov %reg2,%reg3
  5963. (%reg2 not used afterwards)
  5964. Note that xchg takes 3 cycles to execute, and generally mov's take
  5965. only one cycle apiece, but the first two mov's can be executed in
  5966. parallel, only taking 2 cycles overall. Older processors should
  5967. therefore only optimise for size. [Kit]
  5968. }
  5969. TransferUsedRegs(TmpUsedRegs);
  5970. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  5971. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  5972. if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs) then
  5973. begin
  5974. DebugMsg(SPeepholeOptimization + 'MovMovMov2XChg', p);
  5975. AllocRegBetween(taicpu(hp2).oper[1]^.reg, p, hp1, UsedRegs);
  5976. taicpu(hp1).opcode := A_XCHG;
  5977. RemoveCurrentP(p, hp1);
  5978. RemoveInstruction(hp2);
  5979. Result := True;
  5980. Exit;
  5981. end;
  5982. end
  5983. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  5984. MatchInstruction(hp1, A_SAR, []) then
  5985. begin
  5986. if MatchOperand(taicpu(hp1).oper[0]^, 31) then
  5987. begin
  5988. { the use of %edx also covers the opsize being S_L }
  5989. if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
  5990. begin
  5991. { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
  5992. if (taicpu(p).oper[0]^.reg = NR_EAX) and
  5993. (taicpu(p).oper[1]^.reg = NR_EDX) then
  5994. begin
  5995. { Change:
  5996. movl %eax,%edx
  5997. sarl $31,%edx
  5998. To:
  5999. cltd
  6000. }
  6001. DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
  6002. RemoveInstruction(hp1);
  6003. taicpu(p).opcode := A_CDQ;
  6004. taicpu(p).opsize := S_NO;
  6005. taicpu(p).clearop(1);
  6006. taicpu(p).clearop(0);
  6007. taicpu(p).ops:=0;
  6008. Result := True;
  6009. end
  6010. else if (cs_opt_size in current_settings.optimizerswitches) and
  6011. (taicpu(p).oper[0]^.reg = NR_EDX) and
  6012. (taicpu(p).oper[1]^.reg = NR_EAX) then
  6013. begin
  6014. { Change:
  6015. movl %edx,%eax
  6016. sarl $31,%edx
  6017. To:
  6018. movl %edx,%eax
  6019. cltd
  6020. Note that this creates a dependency between the two instructions,
  6021. so only perform if optimising for size.
  6022. }
  6023. DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
  6024. taicpu(hp1).opcode := A_CDQ;
  6025. taicpu(hp1).opsize := S_NO;
  6026. taicpu(hp1).clearop(1);
  6027. taicpu(hp1).clearop(0);
  6028. taicpu(hp1).ops:=0;
  6029. end;
  6030. {$ifndef x86_64}
  6031. end
  6032. { Don't bother if CMOV is supported, because a more optimal
  6033. sequence would have been generated for the Abs() intrinsic }
  6034. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  6035. { the use of %eax also covers the opsize being S_L }
  6036. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
  6037. (taicpu(p).oper[0]^.reg = NR_EAX) and
  6038. (taicpu(p).oper[1]^.reg = NR_EDX) and
  6039. GetNextInstruction(hp1, hp2) and
  6040. MatchInstruction(hp2, A_XOR, [S_L]) and
  6041. MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
  6042. MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
  6043. GetNextInstruction(hp2, hp3) and
  6044. MatchInstruction(hp3, A_SUB, [S_L]) and
  6045. MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
  6046. MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
  6047. begin
  6048. { Change:
  6049. movl %eax,%edx
  6050. sarl $31,%eax
  6051. xorl %eax,%edx
  6052. subl %eax,%edx
  6053. (Instruction that uses %edx)
  6054. (%eax deallocated)
  6055. (%edx deallocated)
  6056. To:
  6057. cltd
  6058. xorl %edx,%eax <-- Note the registers have swapped
  6059. subl %edx,%eax
  6060. (Instruction that uses %eax) <-- %eax rather than %edx
  6061. }
  6062. TransferUsedRegs(TmpUsedRegs);
  6063. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  6064. UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
  6065. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  6066. if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
  6067. begin
  6068. if GetNextInstruction(hp3, hp4) and
  6069. not RegModifiedByInstruction(NR_EDX, hp4) and
  6070. not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
  6071. begin
  6072. DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
  6073. taicpu(p).opcode := A_CDQ;
  6074. taicpu(p).clearop(1);
  6075. taicpu(p).clearop(0);
  6076. taicpu(p).ops:=0;
  6077. RemoveInstruction(hp1);
  6078. taicpu(hp2).loadreg(0, NR_EDX);
  6079. taicpu(hp2).loadreg(1, NR_EAX);
  6080. taicpu(hp3).loadreg(0, NR_EDX);
  6081. taicpu(hp3).loadreg(1, NR_EAX);
  6082. AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
  6083. { Convert references in the following instruction (hp4) from %edx to %eax }
  6084. for OperIdx := 0 to taicpu(hp4).ops - 1 do
  6085. with taicpu(hp4).oper[OperIdx]^ do
  6086. case typ of
  6087. top_reg:
  6088. if getsupreg(reg) = RS_EDX then
  6089. reg := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6090. top_ref:
  6091. begin
  6092. if getsupreg(reg) = RS_EDX then
  6093. ref^.base := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6094. if getsupreg(reg) = RS_EDX then
  6095. ref^.index := newreg(R_INTREGISTER,RS_EAX,getsubreg(reg));
  6096. end;
  6097. else
  6098. ;
  6099. end;
  6100. end;
  6101. end;
  6102. {$else x86_64}
  6103. end;
  6104. end
  6105. else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
  6106. { the use of %rdx also covers the opsize being S_Q }
  6107. MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
  6108. begin
  6109. { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
  6110. if (taicpu(p).oper[0]^.reg = NR_RAX) and
  6111. (taicpu(p).oper[1]^.reg = NR_RDX) then
  6112. begin
  6113. { Change:
  6114. movq %rax,%rdx
  6115. sarq $63,%rdx
  6116. To:
  6117. cqto
  6118. }
  6119. DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
  6120. RemoveInstruction(hp1);
  6121. taicpu(p).opcode := A_CQO;
  6122. taicpu(p).opsize := S_NO;
  6123. taicpu(p).clearop(1);
  6124. taicpu(p).clearop(0);
  6125. taicpu(p).ops:=0;
  6126. Result := True;
  6127. end
  6128. else if (cs_opt_size in current_settings.optimizerswitches) and
  6129. (taicpu(p).oper[0]^.reg = NR_RDX) and
  6130. (taicpu(p).oper[1]^.reg = NR_RAX) then
  6131. begin
  6132. { Change:
  6133. movq %rdx,%rax
  6134. sarq $63,%rdx
  6135. To:
  6136. movq %rdx,%rax
  6137. cqto
  6138. Note that this creates a dependency between the two instructions,
  6139. so only perform if optimising for size.
  6140. }
  6141. DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
  6142. taicpu(hp1).opcode := A_CQO;
  6143. taicpu(hp1).opsize := S_NO;
  6144. taicpu(hp1).clearop(1);
  6145. taicpu(hp1).clearop(0);
  6146. taicpu(hp1).ops:=0;
  6147. {$endif x86_64}
  6148. end;
  6149. end;
  6150. end
  6151. else if MatchInstruction(hp1, A_MOV, []) and
  6152. (taicpu(hp1).oper[1]^.typ = top_reg) then
  6153. { Though "GetNextInstruction" could be factored out, along with
  6154. the instructions that depend on hp2, it is an expensive call that
  6155. should be delayed for as long as possible, hence we do cheaper
  6156. checks first that are likely to be False. [Kit] }
  6157. begin
  6158. if (
  6159. (
  6160. MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
  6161. (taicpu(hp1).oper[1]^.reg = NR_EAX) and
  6162. (
  6163. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6164. MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
  6165. )
  6166. ) or
  6167. (
  6168. MatchOperand(taicpu(p).oper[1]^, NR_EAX) and
  6169. (taicpu(hp1).oper[1]^.reg = NR_EDX) and
  6170. (
  6171. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6172. MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
  6173. )
  6174. )
  6175. ) and
  6176. GetNextInstruction(hp1, hp2) and
  6177. MatchInstruction(hp2, A_SAR, []) and
  6178. MatchOperand(taicpu(hp2).oper[0]^, 31) then
  6179. begin
  6180. if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
  6181. begin
  6182. { Change:
  6183. movl r/m,%edx movl r/m,%eax movl r/m,%edx movl r/m,%eax
  6184. movl %edx,%eax or movl %eax,%edx or movl r/m,%eax or movl r/m,%edx
  6185. sarl $31,%edx sarl $31,%edx sarl $31,%edx sarl $31,%edx
  6186. To:
  6187. movl r/m,%eax <- Note the change in register
  6188. cltd
  6189. }
  6190. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
  6191. AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
  6192. taicpu(p).loadreg(1, NR_EAX);
  6193. taicpu(hp1).opcode := A_CDQ;
  6194. taicpu(hp1).clearop(1);
  6195. taicpu(hp1).clearop(0);
  6196. taicpu(hp1).ops:=0;
  6197. RemoveInstruction(hp2);
  6198. (*
  6199. {$ifdef x86_64}
  6200. end
  6201. else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
  6202. { This code sequence does not get generated - however it might become useful
  6203. if and when 128-bit signed integer types make an appearance, so the code
  6204. is kept here for when it is eventually needed. [Kit] }
  6205. (
  6206. (
  6207. (taicpu(hp1).oper[1]^.reg = NR_RAX) and
  6208. (
  6209. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6210. MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
  6211. )
  6212. ) or
  6213. (
  6214. (taicpu(hp1).oper[1]^.reg = NR_RDX) and
  6215. (
  6216. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
  6217. MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
  6218. )
  6219. )
  6220. ) and
  6221. GetNextInstruction(hp1, hp2) and
  6222. MatchInstruction(hp2, A_SAR, [S_Q]) and
  6223. MatchOperand(taicpu(hp2).oper[0]^, 63) and
  6224. MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
  6225. begin
  6226. { Change:
  6227. movq r/m,%rdx movq r/m,%rax movq r/m,%rdx movq r/m,%rax
  6228. movq %rdx,%rax or movq %rax,%rdx or movq r/m,%rax or movq r/m,%rdx
  6229. sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx sarq $63,%rdx
  6230. To:
  6231. movq r/m,%rax <- Note the change in register
  6232. cqto
  6233. }
  6234. DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
  6235. AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
  6236. taicpu(p).loadreg(1, NR_RAX);
  6237. taicpu(hp1).opcode := A_CQO;
  6238. taicpu(hp1).clearop(1);
  6239. taicpu(hp1).clearop(0);
  6240. taicpu(hp1).ops:=0;
  6241. RemoveInstruction(hp2);
  6242. {$endif x86_64}
  6243. *)
  6244. end;
  6245. end;
  6246. {$ifdef x86_64}
  6247. end
  6248. else if (taicpu(p).opsize = S_L) and
  6249. (taicpu(p).oper[1]^.typ = top_reg) and
  6250. (
  6251. MatchInstruction(hp1, A_MOV,[]) and
  6252. (taicpu(hp1).opsize = S_L) and
  6253. (taicpu(hp1).oper[1]^.typ = top_reg)
  6254. ) and (
  6255. GetNextInstruction(hp1, hp2) and
  6256. (tai(hp2).typ=ait_instruction) and
  6257. (taicpu(hp2).opsize = S_Q) and
  6258. (
  6259. (
  6260. MatchInstruction(hp2, A_ADD,[]) and
  6261. (taicpu(hp2).opsize = S_Q) and
  6262. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6263. (
  6264. (
  6265. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6266. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6267. ) or (
  6268. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6269. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6270. )
  6271. )
  6272. ) or (
  6273. MatchInstruction(hp2, A_LEA,[]) and
  6274. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  6275. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  6276. (
  6277. (
  6278. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  6279. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6280. ) or (
  6281. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  6282. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  6283. )
  6284. ) and (
  6285. (
  6286. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  6287. ) or (
  6288. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  6289. )
  6290. )
  6291. )
  6292. )
  6293. ) and (
  6294. GetNextInstruction(hp2, hp3) and
  6295. MatchInstruction(hp3, A_SHR,[]) and
  6296. (taicpu(hp3).opsize = S_Q) and
  6297. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  6298. (taicpu(hp3).oper[0]^.val = 1) and
  6299. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  6300. ) then
  6301. begin
  6302. { Change movl x, reg1d movl x, reg1d
  6303. movl y, reg2d movl y, reg2d
  6304. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  6305. shrq $1, reg1q shrq $1, reg1q
  6306. ( reg1d and reg2d can be switched around in the first two instructions )
  6307. To movl x, reg1d
  6308. addl y, reg1d
  6309. rcrl $1, reg1d
  6310. This corresponds to the common expression (x + y) shr 1, where
  6311. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  6312. smaller code, but won't account for x + y causing an overflow). [Kit]
  6313. }
  6314. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  6315. { Change first MOV command to have the same register as the final output }
  6316. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  6317. else
  6318. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  6319. { Change second MOV command to an ADD command. This is easier than
  6320. converting the existing command because it means we don't have to
  6321. touch 'y', which might be a complicated reference, and also the
  6322. fact that the third command might either be ADD or LEA. [Kit] }
  6323. taicpu(hp1).opcode := A_ADD;
  6324. { Delete old ADD/LEA instruction }
  6325. RemoveInstruction(hp2);
  6326. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  6327. taicpu(hp3).opcode := A_RCR;
  6328. taicpu(hp3).changeopsize(S_L);
  6329. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  6330. {$endif x86_64}
  6331. end;
  6332. end;
  6333. function TX86AsmOptimizer.OptPass2Movx(var p : tai) : boolean;
  6334. var
  6335. ThisReg: TRegister;
  6336. MinSize, MaxSize, TrySmaller, TargetSize: TOpSize;
  6337. TargetSubReg: TSubRegister;
  6338. hp1, hp2: tai;
  6339. RegInUse, RegChanged, p_removed: Boolean;
  6340. { Store list of found instructions so we don't have to call
  6341. GetNextInstructionUsingReg multiple times }
  6342. InstrList: array of taicpu;
  6343. InstrMax, Index: Integer;
  6344. UpperLimit, TrySmallerLimit: TCgInt;
  6345. PreMessage: string;
  6346. { Data flow analysis }
  6347. TestValMin, TestValMax: TCgInt;
  6348. SmallerOverflow: Boolean;
  6349. begin
  6350. Result := False;
  6351. p_removed := False;
  6352. { This is anything but quick! }
  6353. if not(cs_opt_level2 in current_settings.optimizerswitches) then
  6354. Exit;
  6355. SetLength(InstrList, 0);
  6356. InstrMax := -1;
  6357. ThisReg := taicpu(p).oper[1]^.reg;
  6358. case taicpu(p).opsize of
  6359. S_BW, S_BL:
  6360. begin
  6361. {$if defined(i386) or defined(i8086)}
  6362. { If the target size is 8-bit, make sure we can actually encode it }
  6363. if not (GetSupReg(ThisReg) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) then
  6364. Exit;
  6365. {$endif i386 or i8086}
  6366. UpperLimit := $FF;
  6367. MinSize := S_B;
  6368. if taicpu(p).opsize = S_BW then
  6369. MaxSize := S_W
  6370. else
  6371. MaxSize := S_L;
  6372. end;
  6373. S_WL:
  6374. begin
  6375. UpperLimit := $FFFF;
  6376. MinSize := S_W;
  6377. MaxSize := S_L;
  6378. end
  6379. else
  6380. InternalError(2020112301);
  6381. end;
  6382. TestValMin := 0;
  6383. TestValMax := UpperLimit;
  6384. TrySmallerLimit := UpperLimit;
  6385. TrySmaller := S_NO;
  6386. SmallerOverflow := False;
  6387. RegChanged := False;
  6388. hp1 := p;
  6389. while GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
  6390. (hp1.typ = ait_instruction) and
  6391. (
  6392. { Under -O1 and -O2, GetNextInstructionUsingReg may return an
  6393. instruction that doesn't actually contain ThisReg }
  6394. (cs_opt_level3 in current_settings.optimizerswitches) or
  6395. RegInInstruction(ThisReg, hp1)
  6396. ) do
  6397. begin
  6398. case taicpu(hp1).opcode of
  6399. A_INC,A_DEC:
  6400. begin
  6401. { Has to be an exact match on the register }
  6402. if not MatchOperand(taicpu(hp1).oper[0]^, ThisReg) then
  6403. Break;
  6404. if taicpu(hp1).opcode = A_INC then
  6405. begin
  6406. Inc(TestValMin);
  6407. Inc(TestValMax);
  6408. end
  6409. else
  6410. begin
  6411. Dec(TestValMin);
  6412. Dec(TestValMax);
  6413. end;
  6414. end;
  6415. A_CMP:
  6416. begin
  6417. if (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6418. { Has to be an exact match on the register }
  6419. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6420. (taicpu(hp1).oper[0]^.typ <> top_const) or
  6421. { Make sure the comparison value is not smaller than the
  6422. smallest allowed signed value for the minimum size (e.g.
  6423. -128 for 8-bit) }
  6424. not (
  6425. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6426. { Is it in the negative range? }
  6427. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6428. ) then
  6429. Break;
  6430. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6431. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6432. if (TestValMin < TrySmallerLimit) or (TestValMax < TrySmallerLimit) or
  6433. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6434. { Overflow }
  6435. Break;
  6436. { Check to see if the active register is used afterwards }
  6437. TransferUsedRegs(TmpUsedRegs);
  6438. IncludeRegInUsedRegs(ThisReg, TmpUsedRegs);
  6439. if not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6440. begin
  6441. case MinSize of
  6442. S_B:
  6443. TargetSubReg := R_SUBL;
  6444. S_W:
  6445. TargetSubReg := R_SUBW;
  6446. else
  6447. InternalError(2021051002);
  6448. end;
  6449. { Update the register to its new size }
  6450. setsubreg(ThisReg, TargetSubReg);
  6451. taicpu(hp1).oper[1]^.reg := ThisReg;
  6452. taicpu(hp1).opsize := MinSize;
  6453. { Convert the input MOVZX to a MOV }
  6454. if (taicpu(p).oper[0]^.typ = top_reg) and
  6455. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6456. begin
  6457. { Or remove it completely! }
  6458. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1a', p);
  6459. RemoveCurrentP(p);
  6460. p_removed := True;
  6461. end
  6462. else
  6463. begin
  6464. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1a', p);
  6465. taicpu(p).opcode := A_MOV;
  6466. taicpu(p).oper[1]^.reg := ThisReg;
  6467. taicpu(p).opsize := MinSize;
  6468. end;
  6469. if (InstrMax >= 0) then
  6470. begin
  6471. for Index := 0 to InstrMax do
  6472. begin
  6473. { If p_removed is true, then the original MOV/Z was removed
  6474. and removing the AND instruction may not be safe if it
  6475. appears first }
  6476. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6477. InternalError(2020112311);
  6478. if InstrList[Index].oper[0]^.typ = top_reg then
  6479. InstrList[Index].oper[0]^.reg := ThisReg;
  6480. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6481. InstrList[Index].opsize := MinSize;
  6482. end;
  6483. end;
  6484. Result := True;
  6485. Exit;
  6486. end;
  6487. end;
  6488. { OR and XOR are not included because they can too easily fool
  6489. the data flow analysis (they can cause non-linear behaviour) }
  6490. A_ADD,A_SUB,A_AND,A_SHL,A_SHR:
  6491. begin
  6492. if
  6493. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  6494. { Has to be an exact match on the register }
  6495. (taicpu(hp1).oper[1]^.reg <> ThisReg) or not
  6496. (
  6497. (
  6498. (taicpu(hp1).oper[0]^.typ = top_const) and
  6499. (
  6500. (
  6501. (taicpu(hp1).opcode = A_SHL) and
  6502. (
  6503. ((MinSize = S_B) and (taicpu(hp1).oper[0]^.val < 8)) or
  6504. ((MinSize = S_W) and (taicpu(hp1).oper[0]^.val < 16)) or
  6505. ((MinSize = S_L) and (taicpu(hp1).oper[0]^.val < 32))
  6506. )
  6507. ) or (
  6508. (taicpu(hp1).opcode <> A_SHL) and
  6509. (
  6510. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6511. { Is it in the negative range? }
  6512. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val))
  6513. )
  6514. )
  6515. )
  6516. ) or (
  6517. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) and
  6518. ((taicpu(hp1).opcode = A_ADD) or (taicpu(hp1).opcode = A_AND) or (taicpu(hp1).opcode = A_SUB))
  6519. )
  6520. ) then
  6521. Break;
  6522. case taicpu(hp1).opcode of
  6523. A_ADD:
  6524. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6525. begin
  6526. TestValMin := TestValMin * 2;
  6527. TestValMax := TestValMax * 2;
  6528. end
  6529. else
  6530. begin
  6531. TestValMin := TestValMin + taicpu(hp1).oper[0]^.val;
  6532. TestValMax := TestValMax + taicpu(hp1).oper[0]^.val;
  6533. end;
  6534. A_SUB:
  6535. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  6536. begin
  6537. TestValMin := 0;
  6538. TestValMax := 0;
  6539. end
  6540. else
  6541. begin
  6542. TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
  6543. TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
  6544. end;
  6545. A_AND:
  6546. if (taicpu(hp1).oper[0]^.typ = top_const) then
  6547. begin
  6548. { we might be able to go smaller if AND appears first }
  6549. if InstrMax = -1 then
  6550. case MinSize of
  6551. S_B:
  6552. ;
  6553. S_W:
  6554. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6555. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6556. begin
  6557. TrySmaller := S_B;
  6558. TrySmallerLimit := $FF;
  6559. end;
  6560. S_L:
  6561. if ((taicpu(hp1).oper[0]^.val and $FF) = taicpu(hp1).oper[0]^.val) or
  6562. ((not(taicpu(hp1).oper[0]^.val) and $7F) = (not taicpu(hp1).oper[0]^.val)) then
  6563. begin
  6564. TrySmaller := S_B;
  6565. TrySmallerLimit := $FF;
  6566. end
  6567. else if ((taicpu(hp1).oper[0]^.val and $FFFF) = taicpu(hp1).oper[0]^.val) or
  6568. ((not(taicpu(hp1).oper[0]^.val) and $7FFF) = (not taicpu(hp1).oper[0]^.val)) then
  6569. begin
  6570. TrySmaller := S_W;
  6571. TrySmallerLimit := $FFFF;
  6572. end;
  6573. else
  6574. InternalError(2020112320);
  6575. end;
  6576. TestValMin := TestValMin and taicpu(hp1).oper[0]^.val;
  6577. TestValMax := TestValMax and taicpu(hp1).oper[0]^.val;
  6578. end;
  6579. A_SHL:
  6580. begin
  6581. TestValMin := TestValMin shl taicpu(hp1).oper[0]^.val;
  6582. TestValMax := TestValMax shl taicpu(hp1).oper[0]^.val;
  6583. end;
  6584. A_SHR:
  6585. begin
  6586. { we might be able to go smaller if SHR appears first }
  6587. if InstrMax = -1 then
  6588. case MinSize of
  6589. S_B:
  6590. ;
  6591. S_W:
  6592. if (taicpu(hp1).oper[0]^.val >= 8) then
  6593. begin
  6594. TrySmaller := S_B;
  6595. TrySmallerLimit := $FF;
  6596. end;
  6597. S_L:
  6598. if (taicpu(hp1).oper[0]^.val >= 24) then
  6599. begin
  6600. TrySmaller := S_B;
  6601. TrySmallerLimit := $FF;
  6602. end
  6603. else if (taicpu(hp1).oper[0]^.val >= 16) then
  6604. begin
  6605. TrySmaller := S_W;
  6606. TrySmallerLimit := $FFFF;
  6607. end;
  6608. else
  6609. InternalError(2020112321);
  6610. end;
  6611. TestValMin := TestValMin shr taicpu(hp1).oper[0]^.val;
  6612. TestValMax := TestValMax shr taicpu(hp1).oper[0]^.val;
  6613. end;
  6614. else
  6615. InternalError(2020112303);
  6616. end;
  6617. end;
  6618. (*
  6619. A_IMUL:
  6620. case taicpu(hp1).ops of
  6621. 2:
  6622. begin
  6623. if not MatchOpType(hp1, top_reg, top_reg) or
  6624. { Has to be an exact match on the register }
  6625. (taicpu(hp1).oper[0]^.reg <> ThisReg) or
  6626. (taicpu(hp1).oper[1]^.reg <> ThisReg) then
  6627. Break;
  6628. TestValMin := TestValMin * TestValMin;
  6629. TestValMax := TestValMax * TestValMax;
  6630. end;
  6631. 3:
  6632. begin
  6633. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6634. { Has to be an exact match on the register }
  6635. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6636. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6637. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6638. { Is it in the negative range? }
  6639. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6640. Break;
  6641. TestValMin := TestValMin * taicpu(hp1).oper[0]^.val;
  6642. TestValMax := TestValMax * taicpu(hp1).oper[0]^.val;
  6643. end;
  6644. else
  6645. Break;
  6646. end;
  6647. A_IDIV:
  6648. case taicpu(hp1).ops of
  6649. 3:
  6650. begin
  6651. if not MatchOpType(hp1, top_const, top_reg, top_reg) or
  6652. { Has to be an exact match on the register }
  6653. (taicpu(hp1).oper[1]^.reg <> ThisReg) or
  6654. (taicpu(hp1).oper[2]^.reg <> ThisReg) or
  6655. ((taicpu(hp1).oper[0]^.val and UpperLimit) = taicpu(hp1).oper[0]^.val) or
  6656. { Is it in the negative range? }
  6657. (((not taicpu(hp1).oper[0]^.val) and (UpperLimit shr 1)) = (not taicpu(hp1).oper[0]^.val)) then
  6658. Break;
  6659. TestValMin := TestValMin div taicpu(hp1).oper[0]^.val;
  6660. TestValMax := TestValMax div taicpu(hp1).oper[0]^.val;
  6661. end;
  6662. else
  6663. Break;
  6664. end;
  6665. *)
  6666. A_MOVZX:
  6667. begin
  6668. if not MatchOpType(taicpu(hp1), top_reg, top_reg) then
  6669. Break;
  6670. if not SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, ThisReg) then
  6671. begin
  6672. { Because hp1 was obtained via GetNextInstructionUsingReg
  6673. and ThisReg doesn't appear in the first operand, it
  6674. must appear in the second operand and hence gets
  6675. overwritten }
  6676. if (InstrMax = -1) and
  6677. Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6678. begin
  6679. { The two MOVZX instructions are adjacent, so remove the first one }
  6680. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 5', p);
  6681. RemoveCurrentP(p);
  6682. Result := True;
  6683. Exit;
  6684. end;
  6685. Break;
  6686. end;
  6687. { The objective here is to try to find a combination that
  6688. removes one of the MOV/Z instructions. }
  6689. case taicpu(hp1).opsize of
  6690. S_WL:
  6691. if (MinSize in [S_B, S_W]) then
  6692. begin
  6693. TargetSize := S_L;
  6694. TargetSubReg := R_SUBD;
  6695. end
  6696. else if ((TrySmaller in [S_B, S_W]) and not SmallerOverflow) then
  6697. begin
  6698. TargetSize := TrySmaller;
  6699. if TrySmaller = S_B then
  6700. TargetSubReg := R_SUBL
  6701. else
  6702. TargetSubReg := R_SUBW;
  6703. end
  6704. else
  6705. Break;
  6706. S_BW:
  6707. if (MinSize in [S_B, S_W]) then
  6708. begin
  6709. TargetSize := S_W;
  6710. TargetSubReg := R_SUBW;
  6711. end
  6712. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6713. begin
  6714. TargetSize := S_B;
  6715. TargetSubReg := R_SUBL;
  6716. end
  6717. else
  6718. Break;
  6719. S_BL:
  6720. if (MinSize in [S_B, S_W]) then
  6721. begin
  6722. TargetSize := S_L;
  6723. TargetSubReg := R_SUBD;
  6724. end
  6725. else if ((TrySmaller = S_B) and not SmallerOverflow) then
  6726. begin
  6727. TargetSize := S_B;
  6728. TargetSubReg := R_SUBL;
  6729. end
  6730. else
  6731. Break;
  6732. else
  6733. InternalError(2020112302);
  6734. end;
  6735. { Update the register to its new size }
  6736. setsubreg(ThisReg, TargetSubReg);
  6737. if TargetSize = MinSize then
  6738. begin
  6739. { Convert the input MOVZX to a MOV }
  6740. if (taicpu(p).oper[0]^.typ = top_reg) and
  6741. SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
  6742. begin
  6743. { Or remove it completely! }
  6744. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1', p);
  6745. RemoveCurrentP(p);
  6746. p_removed := True;
  6747. end
  6748. else
  6749. begin
  6750. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1', p);
  6751. taicpu(p).opcode := A_MOV;
  6752. taicpu(p).oper[1]^.reg := ThisReg;
  6753. taicpu(p).opsize := TargetSize;
  6754. end;
  6755. Result := True;
  6756. end
  6757. else if TargetSize <> MaxSize then
  6758. begin
  6759. case MaxSize of
  6760. S_L:
  6761. if TargetSize = S_W then
  6762. begin
  6763. DebugMsg(SPeepholeOptimization + 'movzbl2movzbw', p);
  6764. taicpu(p).opsize := S_BW;
  6765. taicpu(p).oper[1]^.reg := ThisReg;
  6766. Result := True;
  6767. end
  6768. else
  6769. InternalError(2020112341);
  6770. S_W:
  6771. if TargetSize = S_L then
  6772. begin
  6773. DebugMsg(SPeepholeOptimization + 'movzbw2movzbl', p);
  6774. taicpu(p).opsize := S_BL;
  6775. taicpu(p).oper[1]^.reg := ThisReg;
  6776. Result := True;
  6777. end
  6778. else
  6779. InternalError(2020112342);
  6780. else
  6781. ;
  6782. end;
  6783. end;
  6784. if (MaxSize = TargetSize) or
  6785. ((TargetSize = S_L) and (taicpu(hp1).opsize in [S_L, S_BL, S_WL])) or
  6786. ((TargetSize = S_W) and (taicpu(hp1).opsize in [S_W, S_BW])) then
  6787. begin
  6788. { Convert the output MOVZX to a MOV }
  6789. if SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, ThisReg) then
  6790. begin
  6791. { Or remove it completely! }
  6792. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 2', hp1);
  6793. { Be careful; if p = hp1 and p was also removed, p
  6794. will become a dangling pointer }
  6795. if p = hp1 then
  6796. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6797. else
  6798. RemoveInstruction(hp1);
  6799. end
  6800. else
  6801. begin
  6802. taicpu(hp1).opcode := A_MOV;
  6803. taicpu(hp1).oper[0]^.reg := ThisReg;
  6804. taicpu(hp1).opsize := TargetSize;
  6805. { Check to see if the active register is used afterwards;
  6806. if not, we can change it and make a saving. }
  6807. RegInUse := False;
  6808. TransferUsedRegs(TmpUsedRegs);
  6809. { The target register may be marked as in use to cross
  6810. a jump to a distant label, so exclude it }
  6811. ExcludeRegFromUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs);
  6812. hp2 := p;
  6813. repeat
  6814. UpdateUsedRegs(TmpUsedRegs, tai(hp2.next));
  6815. { Explicitly check for the excluded register (don't include the first
  6816. instruction as it may be reading from here }
  6817. if ((p <> hp2) and (RegInInstruction(taicpu(hp1).oper[1]^.reg, hp2))) or
  6818. RegInUsedRegs(taicpu(hp1).oper[1]^.reg, TmpUsedRegs) then
  6819. begin
  6820. RegInUse := True;
  6821. Break;
  6822. end;
  6823. if not GetNextInstruction(hp2, hp2) then
  6824. InternalError(2020112340);
  6825. until (hp2 = hp1);
  6826. if not RegInUse and not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
  6827. begin
  6828. DebugMsg(SPeepholeOptimization + 'Simplified register usage so ' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' = ' + debug_regname(taicpu(p).oper[1]^.reg), p);
  6829. ThisReg := taicpu(hp1).oper[1]^.reg;
  6830. RegChanged := True;
  6831. TransferUsedRegs(TmpUsedRegs);
  6832. AllocRegBetween(ThisReg, p, hp1, TmpUsedRegs);
  6833. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 3', hp1);
  6834. if p = hp1 then
  6835. RemoveCurrentp(p) { p = hp1 and will then become the next instruction }
  6836. else
  6837. RemoveInstruction(hp1);
  6838. { Instruction will become "mov %reg,%reg" }
  6839. if not p_removed and (taicpu(p).opcode = A_MOV) and
  6840. MatchOperand(taicpu(p).oper[0]^, ThisReg) then
  6841. begin
  6842. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 6', p);
  6843. RemoveCurrentP(p);
  6844. p_removed := True;
  6845. end
  6846. else
  6847. taicpu(p).oper[1]^.reg := ThisReg;
  6848. Result := True;
  6849. end
  6850. else
  6851. DebugMsg(SPeepholeOptimization + 'Movzx2Mov 2', hp1);
  6852. end;
  6853. end
  6854. else
  6855. InternalError(2020112330);
  6856. { Now go through every instruction we found and change the
  6857. size. If TargetSize = MaxSize, then almost no changes are
  6858. needed and Result can remain False if it hasn't been set
  6859. yet.
  6860. If RegChanged is True, then the register requires changing
  6861. and so the point about TargetSize = MaxSize doesn't apply. }
  6862. if ((TargetSize <> MaxSize) or RegChanged) and (InstrMax >= 0) then
  6863. begin
  6864. for Index := 0 to InstrMax do
  6865. begin
  6866. { If p_removed is true, then the original MOV/Z was removed
  6867. and removing the AND instruction may not be safe if it
  6868. appears first }
  6869. if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
  6870. InternalError(2020112310);
  6871. if InstrList[Index].oper[0]^.typ = top_reg then
  6872. InstrList[Index].oper[0]^.reg := ThisReg;
  6873. InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
  6874. InstrList[Index].opsize := TargetSize;
  6875. end;
  6876. Result := True;
  6877. end;
  6878. Exit;
  6879. end;
  6880. else
  6881. { This includes ADC, SBB, IDIV and SAR }
  6882. Break;
  6883. end;
  6884. if (TestValMin < 0) or (TestValMax < 0) or
  6885. (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
  6886. { Overflow }
  6887. Break
  6888. else if not SmallerOverflow and (TrySmaller <> S_NO) and
  6889. ((TestValMin > TrySmallerLimit) or (TestValMax > TrySmallerLimit)) then
  6890. SmallerOverflow := True;
  6891. { Contains highest index (so instruction count - 1) }
  6892. Inc(InstrMax);
  6893. if InstrMax > High(InstrList) then
  6894. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  6895. InstrList[InstrMax] := taicpu(hp1);
  6896. end;
  6897. end;
  6898. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  6899. var
  6900. hp1 : tai;
  6901. begin
  6902. Result:=false;
  6903. if (taicpu(p).ops >= 2) and
  6904. ((taicpu(p).oper[0]^.typ = top_const) or
  6905. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  6906. (taicpu(p).oper[1]^.typ = top_reg) and
  6907. ((taicpu(p).ops = 2) or
  6908. ((taicpu(p).oper[2]^.typ = top_reg) and
  6909. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  6910. GetLastInstruction(p,hp1) and
  6911. MatchInstruction(hp1,A_MOV,[]) and
  6912. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6913. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  6914. begin
  6915. TransferUsedRegs(TmpUsedRegs);
  6916. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) or
  6917. ((taicpu(p).ops = 3) and (taicpu(p).oper[1]^.reg=taicpu(p).oper[2]^.reg)) then
  6918. { change
  6919. mov reg1,reg2
  6920. imul y,reg2 to imul y,reg1,reg2 }
  6921. begin
  6922. taicpu(p).ops := 3;
  6923. taicpu(p).loadreg(2,taicpu(p).oper[1]^.reg);
  6924. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  6925. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  6926. RemoveInstruction(hp1);
  6927. result:=true;
  6928. end;
  6929. end;
  6930. end;
  6931. procedure TX86AsmOptimizer.ConvertJumpToRET(const p: tai; const ret_p: tai);
  6932. var
  6933. ThisLabel: TAsmLabel;
  6934. begin
  6935. ThisLabel := tasmlabel(taicpu(p).oper[0]^.ref^.symbol);
  6936. ThisLabel.decrefs;
  6937. taicpu(p).opcode := A_RET;
  6938. taicpu(p).is_jmp := false;
  6939. taicpu(p).ops := taicpu(ret_p).ops;
  6940. case taicpu(ret_p).ops of
  6941. 0:
  6942. taicpu(p).clearop(0);
  6943. 1:
  6944. taicpu(p).loadconst(0,taicpu(ret_p).oper[0]^.val);
  6945. else
  6946. internalerror(2016041301);
  6947. end;
  6948. { If the original label is now dead, it might turn out that the label
  6949. immediately follows p. As a result, everything beyond it, which will
  6950. be just some final register configuration and a RET instruction, is
  6951. now dead code. [Kit] }
  6952. { NOTE: This is much faster than introducing a OptPass2RET routine and
  6953. running RemoveDeadCodeAfterJump for each RET instruction, because
  6954. this optimisation rarely happens and most RETs appear at the end of
  6955. routines where there is nothing that can be stripped. [Kit] }
  6956. if not ThisLabel.is_used then
  6957. RemoveDeadCodeAfterJump(p);
  6958. end;
  6959. function TX86AsmOptimizer.OptPass2SETcc(var p: tai): boolean;
  6960. var
  6961. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  6962. Unconditional, PotentialModified: Boolean;
  6963. OperPtr: POper;
  6964. NewRef: TReference;
  6965. InstrList: array of taicpu;
  6966. InstrMax, Index: Integer;
  6967. const
  6968. {$ifdef DEBUG_AOPTCPU}
  6969. SNoFlags: shortstring = ' so the flags aren''t modified';
  6970. {$else DEBUG_AOPTCPU}
  6971. SNoFlags = '';
  6972. {$endif DEBUG_AOPTCPU}
  6973. begin
  6974. Result:=false;
  6975. if MatchOpType(taicpu(p),top_reg) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  6976. begin
  6977. if MatchInstruction(hp1, A_TEST, [S_B]) and
  6978. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  6979. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  6980. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  6981. GetNextInstruction(hp1, hp2) and
  6982. MatchInstruction(hp2, A_Jcc, A_SETcc, []) then
  6983. { Change from: To:
  6984. set(C) %reg j(~C) label
  6985. test %reg,%reg/cmp $0,%reg
  6986. je label
  6987. set(C) %reg j(C) label
  6988. test %reg,%reg/cmp $0,%reg
  6989. jne label
  6990. (Also do something similar with sete/setne instead of je/jne)
  6991. }
  6992. begin
  6993. { Before we do anything else, we need to check the instructions
  6994. in between SETcc and TEST to make sure they don't modify the
  6995. FLAGS register - if -O2 or under, there won't be any
  6996. instructions between SET and TEST }
  6997. TransferUsedRegs(TmpUsedRegs);
  6998. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  6999. if (cs_opt_level3 in current_settings.optimizerswitches) then
  7000. begin
  7001. next := p;
  7002. SetLength(InstrList, 0);
  7003. InstrMax := -1;
  7004. PotentialModified := False;
  7005. { Make a note of every instruction that modifies the FLAGS
  7006. register }
  7007. while GetNextInstruction(next, next) and (next <> hp1) do
  7008. begin
  7009. if next.typ <> ait_instruction then
  7010. { GetNextInstructionUsingReg should have returned False }
  7011. InternalError(2021051701);
  7012. if RegModifiedByInstruction(NR_DEFAULTFLAGS, next) then
  7013. begin
  7014. case taicpu(next).opcode of
  7015. A_SETcc,
  7016. A_CMOVcc,
  7017. A_Jcc:
  7018. begin
  7019. if PotentialModified then
  7020. { Not safe because the flags were modified earlier }
  7021. Exit
  7022. else
  7023. { Condition is the same as the initial SETcc, so this is safe
  7024. (don't add to instruction list though) }
  7025. Continue;
  7026. end;
  7027. A_ADD:
  7028. begin
  7029. if (taicpu(next).opsize = S_B) or
  7030. { LEA doesn't support 8-bit operands }
  7031. (taicpu(next).oper[1]^.typ <> top_reg) or
  7032. { Must write to a register }
  7033. (taicpu(next).oper[0]^.typ = top_ref) then
  7034. { Require a constant or a register }
  7035. Exit;
  7036. PotentialModified := True;
  7037. end;
  7038. A_SUB:
  7039. begin
  7040. if (taicpu(next).opsize = S_B) or
  7041. { LEA doesn't support 8-bit operands }
  7042. (taicpu(next).oper[1]^.typ <> top_reg) or
  7043. { Must write to a register }
  7044. (taicpu(next).oper[0]^.typ <> top_const) or
  7045. (taicpu(next).oper[0]^.val = $80000000) then
  7046. { Can't subtract a register with LEA - also
  7047. check that the value isn't -2^31, as this
  7048. can't be negated }
  7049. Exit;
  7050. PotentialModified := True;
  7051. end;
  7052. A_SAL,
  7053. A_SHL:
  7054. begin
  7055. if (taicpu(next).opsize = S_B) or
  7056. { LEA doesn't support 8-bit operands }
  7057. (taicpu(next).oper[1]^.typ <> top_reg) or
  7058. { Must write to a register }
  7059. (taicpu(next).oper[0]^.typ <> top_const) or
  7060. (taicpu(next).oper[0]^.val < 0) or
  7061. (taicpu(next).oper[0]^.val > 3) then
  7062. Exit;
  7063. PotentialModified := True;
  7064. end;
  7065. A_IMUL:
  7066. begin
  7067. if (taicpu(next).ops <> 3) or
  7068. (taicpu(next).oper[1]^.typ <> top_reg) or
  7069. { Must write to a register }
  7070. (taicpu(next).oper[2]^.val in [2,3,4,5,8,9]) then
  7071. { We can convert "imul x,%reg1,%reg2" (where x = 2, 4 or 8)
  7072. to "lea (%reg1,x),%reg2". If x = 3, 5 or 9, we can
  7073. change this to "lea (%reg1,%reg1,(x-1)),%reg2" }
  7074. Exit
  7075. else
  7076. PotentialModified := True;
  7077. end;
  7078. else
  7079. { Don't know how to change this, so abort }
  7080. Exit;
  7081. end;
  7082. { Contains highest index (so instruction count - 1) }
  7083. Inc(InstrMax);
  7084. if InstrMax > High(InstrList) then
  7085. SetLength(InstrList, InstrMax + LIST_STEP_SIZE);
  7086. InstrList[InstrMax] := taicpu(next);
  7087. end;
  7088. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  7089. end;
  7090. if not Assigned(next) or (next <> hp1) then
  7091. { It should be equal to hp1 }
  7092. InternalError(2021051702);
  7093. { Cycle through each instruction and check to see if we can
  7094. change them to versions that don't modify the flags }
  7095. if (InstrMax >= 0) then
  7096. begin
  7097. for Index := 0 to InstrMax do
  7098. case InstrList[Index].opcode of
  7099. A_ADD:
  7100. begin
  7101. DebugMsg(SPeepholeOptimization + 'ADD -> LEA' + SNoFlags, InstrList[Index]);
  7102. InstrList[Index].opcode := A_LEA;
  7103. reference_reset(NewRef, 1, []);
  7104. NewRef.base := InstrList[Index].oper[1]^.reg;
  7105. if InstrList[Index].oper[0]^.typ = top_reg then
  7106. begin
  7107. NewRef.index := InstrList[Index].oper[0]^.reg;
  7108. NewRef.scalefactor := 1;
  7109. end
  7110. else
  7111. NewRef.offset := InstrList[Index].oper[0]^.val;
  7112. InstrList[Index].loadref(0, NewRef);
  7113. end;
  7114. A_SUB:
  7115. begin
  7116. DebugMsg(SPeepholeOptimization + 'SUB -> LEA' + SNoFlags, InstrList[Index]);
  7117. InstrList[Index].opcode := A_LEA;
  7118. reference_reset(NewRef, 1, []);
  7119. NewRef.base := InstrList[Index].oper[1]^.reg;
  7120. NewRef.offset := -InstrList[Index].oper[0]^.val;
  7121. InstrList[Index].loadref(0, NewRef);
  7122. end;
  7123. A_SHL,
  7124. A_SAL:
  7125. begin
  7126. DebugMsg(SPeepholeOptimization + 'SHL -> LEA' + SNoFlags, InstrList[Index]);
  7127. InstrList[Index].opcode := A_LEA;
  7128. reference_reset(NewRef, 1, []);
  7129. NewRef.index := InstrList[Index].oper[1]^.reg;
  7130. NewRef.scalefactor := 1 shl (InstrList[Index].oper[0]^.val);
  7131. InstrList[Index].loadref(0, NewRef);
  7132. end;
  7133. A_IMUL:
  7134. begin
  7135. DebugMsg(SPeepholeOptimization + 'IMUL -> LEA' + SNoFlags, InstrList[Index]);
  7136. InstrList[Index].opcode := A_LEA;
  7137. reference_reset(NewRef, 1, []);
  7138. NewRef.index := InstrList[Index].oper[1]^.reg;
  7139. case InstrList[Index].oper[0]^.val of
  7140. 2, 4, 8:
  7141. NewRef.scalefactor := InstrList[Index].oper[0]^.val;
  7142. else {3, 5 and 9}
  7143. begin
  7144. NewRef.scalefactor := InstrList[Index].oper[0]^.val - 1;
  7145. NewRef.base := InstrList[Index].oper[1]^.reg;
  7146. end;
  7147. end;
  7148. InstrList[Index].loadref(0, NewRef);
  7149. end;
  7150. else
  7151. InternalError(2021051710);
  7152. end;
  7153. end;
  7154. { Mark the FLAGS register as used across this whole block }
  7155. AllocRegBetween(NR_DEFAULTFLAGS, p, hp1, UsedRegs);
  7156. end;
  7157. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  7158. JumpC := taicpu(hp2).condition;
  7159. Unconditional := False;
  7160. if conditions_equal(JumpC, C_E) then
  7161. SetC := inverse_cond(taicpu(p).condition)
  7162. else if conditions_equal(JumpC, C_NE) then
  7163. SetC := taicpu(p).condition
  7164. else
  7165. { We've got something weird here (and inefficent) }
  7166. begin
  7167. DebugMsg('DEBUG: Inefficient jump - check code generation', p);
  7168. SetC := C_NONE;
  7169. { JAE/JNB will always branch (use 'condition_in', since C_AE <> C_NB normally) }
  7170. if condition_in(C_AE, JumpC) then
  7171. Unconditional := True
  7172. else
  7173. { Not sure what to do with this jump - drop out }
  7174. Exit;
  7175. end;
  7176. RemoveInstruction(hp1);
  7177. if Unconditional then
  7178. MakeUnconditional(taicpu(hp2))
  7179. else
  7180. begin
  7181. if SetC = C_NONE then
  7182. InternalError(2018061402);
  7183. taicpu(hp2).SetCondition(SetC);
  7184. end;
  7185. { as hp2 is a jump, we cannot use RegUsedAfterInstruction but we have to check if it is included in
  7186. TmpUsedRegs }
  7187. if not TmpUsedRegs[getregtype(taicpu(p).oper[0]^.reg)].IsUsed(taicpu(p).oper[0]^.reg) then
  7188. begin
  7189. RemoveCurrentp(p, hp2);
  7190. if taicpu(hp2).opcode = A_SETcc then
  7191. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc',p)
  7192. else
  7193. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  7194. end
  7195. else
  7196. if taicpu(hp2).opcode = A_SETcc then
  7197. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/SETcc -> SETcc/SETcc',p)
  7198. else
  7199. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> SETcc/Jcc',p);
  7200. Result := True;
  7201. end
  7202. else if
  7203. { Make sure the instructions are adjacent }
  7204. (
  7205. not (cs_opt_level3 in current_settings.optimizerswitches) or
  7206. GetNextInstruction(p, hp1)
  7207. ) and
  7208. MatchInstruction(hp1, A_MOV, [S_B]) and
  7209. { Writing to memory is allowed }
  7210. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) then
  7211. begin
  7212. {
  7213. Watch out for sequences such as:
  7214. set(c)b %regb
  7215. movb %regb,(ref)
  7216. movb $0,1(ref)
  7217. movb $0,2(ref)
  7218. movb $0,3(ref)
  7219. Much more efficient to turn it into:
  7220. movl $0,%regl
  7221. set(c)b %regb
  7222. movl %regl,(ref)
  7223. Or:
  7224. set(c)b %regb
  7225. movzbl %regb,%regl
  7226. movl %regl,(ref)
  7227. }
  7228. if (taicpu(hp1).oper[1]^.typ = top_ref) and
  7229. GetNextInstruction(hp1, hp2) and
  7230. MatchInstruction(hp2, A_MOV, [S_B]) and
  7231. (taicpu(hp2).oper[1]^.typ = top_ref) and
  7232. CheckMemoryWrite(taicpu(hp1), taicpu(hp2)) then
  7233. begin
  7234. { Don't do anything else except set Result to True }
  7235. end
  7236. else
  7237. begin
  7238. if taicpu(p).oper[0]^.typ = top_reg then
  7239. begin
  7240. TransferUsedRegs(TmpUsedRegs);
  7241. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  7242. end;
  7243. { If it's not a register, it's a memory address }
  7244. if (taicpu(p).oper[0]^.typ <> top_reg) or RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp1, TmpUsedRegs) then
  7245. begin
  7246. { Even if the register is still in use, we can minimise the
  7247. pipeline stall by changing the MOV into another SETcc. }
  7248. taicpu(hp1).opcode := A_SETcc;
  7249. taicpu(hp1).condition := taicpu(p).condition;
  7250. if taicpu(hp1).oper[1]^.typ = top_ref then
  7251. begin
  7252. { Swapping the operand pointers like this is probably a
  7253. bit naughty, but it is far faster than using loadoper
  7254. to transfer the reference from oper[1] to oper[0] if
  7255. you take into account the extra procedure calls and
  7256. the memory allocation and deallocation required }
  7257. OperPtr := taicpu(hp1).oper[1];
  7258. taicpu(hp1).oper[1] := taicpu(hp1).oper[0];
  7259. taicpu(hp1).oper[0] := OperPtr;
  7260. end
  7261. else
  7262. taicpu(hp1).oper[0]^.reg := taicpu(hp1).oper[1]^.reg;
  7263. taicpu(hp1).clearop(1);
  7264. taicpu(hp1).ops := 1;
  7265. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc/SETcc',p);
  7266. end
  7267. else
  7268. begin
  7269. if taicpu(hp1).oper[1]^.typ = top_reg then
  7270. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  7271. taicpu(p).loadoper(0, taicpu(hp1).oper[1]^);
  7272. RemoveInstruction(hp1);
  7273. DebugMsg(SPeepholeOptimization + 'SETcc/Mov -> SETcc',p);
  7274. end
  7275. end;
  7276. Result := True;
  7277. end;
  7278. end;
  7279. end;
  7280. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  7281. var
  7282. hp1: tai;
  7283. Count: Integer;
  7284. OrigLabel: TAsmLabel;
  7285. begin
  7286. result := False;
  7287. { Sometimes, the optimisations below can permit this }
  7288. RemoveDeadCodeAfterJump(p);
  7289. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  7290. (taicpu(p).oper[0]^.ref^.index=NR_NO) and (taicpu(p).oper[0]^.ref^.symbol is tasmlabel) then
  7291. begin
  7292. OrigLabel := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7293. { Also a side-effect of optimisations }
  7294. if CollapseZeroDistJump(p, OrigLabel) then
  7295. begin
  7296. Result := True;
  7297. Exit;
  7298. end;
  7299. hp1 := GetLabelWithSym(OrigLabel);
  7300. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ = ait_instruction) then
  7301. begin
  7302. case taicpu(hp1).opcode of
  7303. A_RET:
  7304. {
  7305. change
  7306. jmp .L1
  7307. ...
  7308. .L1:
  7309. ret
  7310. into
  7311. ret
  7312. }
  7313. begin
  7314. ConvertJumpToRET(p, hp1);
  7315. result:=true;
  7316. end;
  7317. { Check any kind of direct assignment instruction }
  7318. A_MOV,
  7319. A_MOVD,
  7320. A_MOVQ,
  7321. A_MOVSX,
  7322. {$ifdef x86_64}
  7323. A_MOVSXD,
  7324. {$endif x86_64}
  7325. A_MOVZX,
  7326. A_MOVAPS,
  7327. A_MOVUPS,
  7328. A_MOVSD,
  7329. A_MOVAPD,
  7330. A_MOVUPD,
  7331. A_MOVDQA,
  7332. A_MOVDQU,
  7333. A_VMOVSS,
  7334. A_VMOVAPS,
  7335. A_VMOVUPS,
  7336. A_VMOVSD,
  7337. A_VMOVAPD,
  7338. A_VMOVUPD,
  7339. A_VMOVDQA,
  7340. A_VMOVDQU:
  7341. if ((current_settings.optimizerswitches * [cs_opt_level3, cs_opt_size]) <> [cs_opt_size]) and
  7342. CheckJumpMovTransferOpt(p, hp1, 0, Count) then
  7343. begin
  7344. Result := True;
  7345. Exit;
  7346. end;
  7347. else
  7348. ;
  7349. end;
  7350. end;
  7351. end;
  7352. end;
  7353. class function TX86AsmOptimizer.CanBeCMOV(p : tai) : boolean;
  7354. begin
  7355. CanBeCMOV:=assigned(p) and
  7356. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  7357. { we can't use cmov ref,reg because
  7358. ref could be nil and cmov still throws an exception
  7359. if ref=nil but the mov isn't done (FK)
  7360. or ((taicpu(p).oper[0]^.typ = top_ref) and
  7361. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  7362. }
  7363. (taicpu(p).oper[1]^.typ = top_reg) and
  7364. (
  7365. (taicpu(p).oper[0]^.typ = top_reg) or
  7366. { allow references, but only pure symbols or got rel. addressing with RIP as based,
  7367. it is not expected that this can cause a seg. violation }
  7368. (
  7369. (taicpu(p).oper[0]^.typ = top_ref) and
  7370. IsRefSafe(taicpu(p).oper[0]^.ref)
  7371. )
  7372. );
  7373. end;
  7374. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  7375. var
  7376. hp1,hp2: tai;
  7377. {$ifndef i8086}
  7378. hp3,hp4,hpmov2, hp5: tai;
  7379. l : Longint;
  7380. condition : TAsmCond;
  7381. {$endif i8086}
  7382. carryadd_opcode : TAsmOp;
  7383. symbol: TAsmSymbol;
  7384. reg: tsuperregister;
  7385. increg, tmpreg: TRegister;
  7386. begin
  7387. result:=false;
  7388. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) then
  7389. begin
  7390. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  7391. if (
  7392. (
  7393. ((Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB)) and
  7394. MatchOptype(Taicpu(hp1),top_const,top_reg) and
  7395. (Taicpu(hp1).oper[0]^.val=1)
  7396. ) or
  7397. ((Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC))
  7398. ) and
  7399. GetNextInstruction(hp1,hp2) and
  7400. SkipAligns(hp2, hp2) and
  7401. (hp2.typ = ait_label) and
  7402. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  7403. { jb @@1 cmc
  7404. inc/dec operand --> adc/sbb operand,0
  7405. @@1:
  7406. ... and ...
  7407. jnb @@1
  7408. inc/dec operand --> adc/sbb operand,0
  7409. @@1: }
  7410. begin
  7411. if Taicpu(p).condition in [C_NAE,C_B,C_C] then
  7412. begin
  7413. case taicpu(hp1).opcode of
  7414. A_INC,
  7415. A_ADD:
  7416. carryadd_opcode:=A_ADC;
  7417. A_DEC,
  7418. A_SUB:
  7419. carryadd_opcode:=A_SBB;
  7420. else
  7421. InternalError(2021011001);
  7422. end;
  7423. Taicpu(p).clearop(0);
  7424. Taicpu(p).ops:=0;
  7425. Taicpu(p).is_jmp:=false;
  7426. Taicpu(p).opcode:=A_CMC;
  7427. Taicpu(p).condition:=C_NONE;
  7428. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2CmcAdc/Sbb',p);
  7429. Taicpu(hp1).ops:=2;
  7430. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7431. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7432. else
  7433. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7434. Taicpu(hp1).loadconst(0,0);
  7435. Taicpu(hp1).opcode:=carryadd_opcode;
  7436. result:=true;
  7437. exit;
  7438. end
  7439. else if Taicpu(p).condition in [C_AE,C_NB,C_NC] then
  7440. begin
  7441. case taicpu(hp1).opcode of
  7442. A_INC,
  7443. A_ADD:
  7444. carryadd_opcode:=A_ADC;
  7445. A_DEC,
  7446. A_SUB:
  7447. carryadd_opcode:=A_SBB;
  7448. else
  7449. InternalError(2021011002);
  7450. end;
  7451. Taicpu(hp1).ops:=2;
  7452. DebugMsg(SPeepholeOptimization+'JccAdd/Inc/Dec2Adc/Sbb',p);
  7453. if (Taicpu(hp1).opcode=A_ADD) or (Taicpu(hp1).opcode=A_SUB) then
  7454. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[1]^)
  7455. else
  7456. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  7457. Taicpu(hp1).loadconst(0,0);
  7458. Taicpu(hp1).opcode:=carryadd_opcode;
  7459. RemoveCurrentP(p, hp1);
  7460. result:=true;
  7461. exit;
  7462. end
  7463. {
  7464. jcc @@1 setcc tmpreg
  7465. inc/dec/add/sub operand -> (movzx tmpreg)
  7466. @@1: add/sub tmpreg,operand
  7467. While this increases code size slightly, it makes the code much faster if the
  7468. jump is unpredictable
  7469. }
  7470. else if not(cs_opt_size in current_settings.optimizerswitches) then
  7471. begin
  7472. { search for an available register which is volatile }
  7473. for reg in tcpuregisterset do
  7474. begin
  7475. if
  7476. {$if defined(i386) or defined(i8086)}
  7477. { Only use registers whose lowest 8-bits can Be accessed }
  7478. (reg in [RS_EAX,RS_EBX,RS_ECX,RS_EDX]) and
  7479. {$endif i386 or i8086}
  7480. (reg in paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption)) and
  7481. not(reg in UsedRegs[R_INTREGISTER].GetUsedRegs)
  7482. { We don't need to check if tmpreg is in hp1 or not, because
  7483. it will be marked as in use at p (if not, this is
  7484. indictive of a compiler bug). }
  7485. then
  7486. begin
  7487. TAsmLabel(symbol).decrefs;
  7488. increg := newreg(R_INTREGISTER,reg,R_SUBL);
  7489. Taicpu(p).clearop(0);
  7490. Taicpu(p).ops:=1;
  7491. Taicpu(p).is_jmp:=false;
  7492. Taicpu(p).opcode:=A_SETcc;
  7493. DebugMsg(SPeepholeOptimization+'JccAdd2SetccAdd',p);
  7494. Taicpu(p).condition:=inverse_cond(Taicpu(p).condition);
  7495. Taicpu(p).loadreg(0,increg);
  7496. if getsubreg(Taicpu(hp1).oper[1]^.reg)<>R_SUBL then
  7497. begin
  7498. case getsubreg(Taicpu(hp1).oper[1]^.reg) of
  7499. R_SUBW:
  7500. begin
  7501. tmpreg := newreg(R_INTREGISTER,reg,R_SUBW);
  7502. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BW,increg,tmpreg);
  7503. end;
  7504. R_SUBD:
  7505. begin
  7506. tmpreg := newreg(R_INTREGISTER,reg,R_SUBD);
  7507. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,tmpreg);
  7508. end;
  7509. {$ifdef x86_64}
  7510. R_SUBQ:
  7511. begin
  7512. { MOVZX doesn't have a 64-bit variant, because
  7513. the 32-bit version implicitly zeroes the
  7514. upper 32-bits of the destination register }
  7515. hp2:=Taicpu.op_reg_reg(A_MOVZX,S_BL,increg,
  7516. newreg(R_INTREGISTER,reg,R_SUBD));
  7517. tmpreg := newreg(R_INTREGISTER,reg,R_SUBQ);
  7518. end;
  7519. {$endif x86_64}
  7520. else
  7521. Internalerror(2020030601);
  7522. end;
  7523. taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
  7524. asml.InsertAfter(hp2,p);
  7525. end
  7526. else
  7527. tmpreg := increg;
  7528. if (Taicpu(hp1).opcode=A_INC) or (Taicpu(hp1).opcode=A_DEC) then
  7529. begin
  7530. Taicpu(hp1).ops:=2;
  7531. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^)
  7532. end;
  7533. Taicpu(hp1).loadreg(0,tmpreg);
  7534. AllocRegBetween(tmpreg,p,hp1,UsedRegs);
  7535. Result := True;
  7536. { p is no longer a Jcc instruction, so exit }
  7537. Exit;
  7538. end;
  7539. end;
  7540. end;
  7541. end;
  7542. { Detect the following:
  7543. jmp<cond> @Lbl1
  7544. jmp @Lbl2
  7545. ...
  7546. @Lbl1:
  7547. ret
  7548. Change to:
  7549. jmp<inv_cond> @Lbl2
  7550. ret
  7551. }
  7552. if MatchInstruction(hp1,A_JMP,[]) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  7553. begin
  7554. hp2:=getlabelwithsym(TAsmLabel(symbol));
  7555. if Assigned(hp2) and SkipLabels(hp2,hp2) and
  7556. MatchInstruction(hp2,A_RET,[S_NO]) then
  7557. begin
  7558. taicpu(p).condition := inverse_cond(taicpu(p).condition);
  7559. { Change label address to that of the unconditional jump }
  7560. taicpu(p).loadoper(0, taicpu(hp1).oper[0]^);
  7561. TAsmLabel(symbol).DecRefs;
  7562. taicpu(hp1).opcode := A_RET;
  7563. taicpu(hp1).is_jmp := false;
  7564. taicpu(hp1).ops := taicpu(hp2).ops;
  7565. DebugMsg(SPeepholeOptimization+'JccJmpRet2J!ccRet',p);
  7566. case taicpu(hp2).ops of
  7567. 0:
  7568. taicpu(hp1).clearop(0);
  7569. 1:
  7570. taicpu(hp1).loadconst(0,taicpu(hp2).oper[0]^.val);
  7571. else
  7572. internalerror(2016041302);
  7573. end;
  7574. end;
  7575. {$ifndef i8086}
  7576. end
  7577. {
  7578. convert
  7579. j<c> .L1
  7580. mov 1,reg
  7581. jmp .L2
  7582. .L1
  7583. mov 0,reg
  7584. .L2
  7585. into
  7586. mov 0,reg
  7587. set<not(c)> reg
  7588. take care of alignment and that the mov 0,reg is not converted into a xor as this
  7589. would destroy the flag contents
  7590. }
  7591. else if MatchInstruction(hp1,A_MOV,[]) and
  7592. MatchOpType(taicpu(hp1),top_const,top_reg) and
  7593. {$ifdef i386}
  7594. (
  7595. { Under i386, ESI, EDI, EBP and ESP
  7596. don't have an 8-bit representation }
  7597. not (getsupreg(taicpu(hp1).oper[1]^.reg) in [RS_ESI, RS_EDI, RS_EBP, RS_ESP])
  7598. ) and
  7599. {$endif i386}
  7600. (taicpu(hp1).oper[0]^.val=1) and
  7601. GetNextInstruction(hp1,hp2) and
  7602. MatchInstruction(hp2,A_JMP,[]) and (taicpu(hp2).oper[0]^.ref^.refaddr=addr_full) and
  7603. GetNextInstruction(hp2,hp3) and
  7604. { skip align }
  7605. ((hp3.typ<>ait_align) or GetNextInstruction(hp3,hp3)) and
  7606. (hp3.typ=ait_label) and
  7607. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol)=tai_label(hp3).labsym) and
  7608. (tai_label(hp3).labsym.getrefs=1) and
  7609. GetNextInstruction(hp3,hp4) and
  7610. MatchInstruction(hp4,A_MOV,[]) and
  7611. MatchOpType(taicpu(hp4),top_const,top_reg) and
  7612. (taicpu(hp4).oper[0]^.val=0) and
  7613. MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp4).oper[1]^) and
  7614. GetNextInstruction(hp4,hp5) and
  7615. (hp5.typ=ait_label) and
  7616. (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol)=tai_label(hp5).labsym) and
  7617. (tai_label(hp5).labsym.getrefs=1) then
  7618. begin
  7619. AllocRegBetween(NR_FLAGS,p,hp4,UsedRegs);
  7620. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2MovSetcc',p);
  7621. { remove last label }
  7622. RemoveInstruction(hp5);
  7623. { remove second label }
  7624. RemoveInstruction(hp3);
  7625. { if align is present remove it }
  7626. if GetNextInstruction(hp2,hp3) and (hp3.typ=ait_align) then
  7627. RemoveInstruction(hp3);
  7628. { remove jmp }
  7629. RemoveInstruction(hp2);
  7630. if taicpu(hp1).opsize=S_B then
  7631. RemoveInstruction(hp1)
  7632. else
  7633. taicpu(hp1).loadconst(0,0);
  7634. taicpu(hp4).opcode:=A_SETcc;
  7635. taicpu(hp4).opsize:=S_B;
  7636. taicpu(hp4).condition:=inverse_cond(taicpu(p).condition);
  7637. taicpu(hp4).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(hp4).oper[1]^.reg),R_SUBL));
  7638. taicpu(hp4).opercnt:=1;
  7639. taicpu(hp4).ops:=1;
  7640. taicpu(hp4).freeop(1);
  7641. RemoveCurrentP(p);
  7642. Result:=true;
  7643. exit;
  7644. end
  7645. else if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  7646. begin
  7647. { check for
  7648. jCC xxx
  7649. <several movs>
  7650. xxx:
  7651. }
  7652. l:=0;
  7653. while assigned(hp1) and
  7654. CanBeCMOV(hp1) and
  7655. { stop on labels }
  7656. not(hp1.typ=ait_label) do
  7657. begin
  7658. inc(l);
  7659. GetNextInstruction(hp1,hp1);
  7660. end;
  7661. if assigned(hp1) then
  7662. begin
  7663. if FindLabel(tasmlabel(symbol),hp1) then
  7664. begin
  7665. if (l<=4) and (l>0) then
  7666. begin
  7667. condition:=inverse_cond(taicpu(p).condition);
  7668. GetNextInstruction(p,hp1);
  7669. repeat
  7670. if not Assigned(hp1) then
  7671. InternalError(2018062900);
  7672. taicpu(hp1).opcode:=A_CMOVcc;
  7673. taicpu(hp1).condition:=condition;
  7674. UpdateUsedRegs(hp1);
  7675. GetNextInstruction(hp1,hp1);
  7676. until not(CanBeCMOV(hp1));
  7677. { Remember what hp1 is in case there's multiple aligns to get rid of }
  7678. hp2 := hp1;
  7679. repeat
  7680. if not Assigned(hp2) then
  7681. InternalError(2018062910);
  7682. case hp2.typ of
  7683. ait_label:
  7684. { What we expected - break out of the loop (it won't be a dead label at the top of
  7685. a cluster because that was optimised at an earlier stage) }
  7686. Break;
  7687. ait_align:
  7688. { Go to the next entry until a label is found (may be multiple aligns before it) }
  7689. begin
  7690. hp2 := tai(hp2.Next);
  7691. Continue;
  7692. end;
  7693. else
  7694. begin
  7695. { Might be a comment or temporary allocation entry }
  7696. if not (hp2.typ in SkipInstr) then
  7697. InternalError(2018062911);
  7698. hp2 := tai(hp2.Next);
  7699. Continue;
  7700. end;
  7701. end;
  7702. until False;
  7703. { Now we can safely decrement the reference count }
  7704. tasmlabel(symbol).decrefs;
  7705. DebugMsg(SPeepholeOptimization+'JccMov2CMov',p);
  7706. { Remove the original jump }
  7707. RemoveInstruction(p); { Note, the choice to not use RemoveCurrentp is deliberate }
  7708. GetNextInstruction(hp2, p); { Instruction after the label }
  7709. { Remove the label if this is its final reference }
  7710. if (tasmlabel(symbol).getrefs=0) then
  7711. StripLabelFast(hp1);
  7712. if Assigned(p) then
  7713. begin
  7714. UpdateUsedRegs(p);
  7715. result:=true;
  7716. end;
  7717. exit;
  7718. end;
  7719. end
  7720. else
  7721. begin
  7722. { check further for
  7723. jCC xxx
  7724. <several movs 1>
  7725. jmp yyy
  7726. xxx:
  7727. <several movs 2>
  7728. yyy:
  7729. }
  7730. { hp2 points to jmp yyy }
  7731. hp2:=hp1;
  7732. { skip hp1 to xxx (or an align right before it) }
  7733. GetNextInstruction(hp1, hp1);
  7734. if assigned(hp2) and
  7735. assigned(hp1) and
  7736. (l<=3) and
  7737. (hp2.typ=ait_instruction) and
  7738. (taicpu(hp2).is_jmp) and
  7739. (taicpu(hp2).condition=C_None) and
  7740. { real label and jump, no further references to the
  7741. label are allowed }
  7742. (tasmlabel(symbol).getrefs=1) and
  7743. FindLabel(tasmlabel(symbol),hp1) then
  7744. begin
  7745. l:=0;
  7746. { skip hp1 to <several moves 2> }
  7747. if (hp1.typ = ait_align) then
  7748. GetNextInstruction(hp1, hp1);
  7749. GetNextInstruction(hp1, hpmov2);
  7750. hp1 := hpmov2;
  7751. while assigned(hp1) and
  7752. CanBeCMOV(hp1) do
  7753. begin
  7754. inc(l);
  7755. GetNextInstruction(hp1, hp1);
  7756. end;
  7757. { hp1 points to yyy (or an align right before it) }
  7758. hp3 := hp1;
  7759. if assigned(hp1) and
  7760. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  7761. begin
  7762. condition:=inverse_cond(taicpu(p).condition);
  7763. GetNextInstruction(p,hp1);
  7764. repeat
  7765. taicpu(hp1).opcode:=A_CMOVcc;
  7766. taicpu(hp1).condition:=condition;
  7767. UpdateUsedRegs(hp1);
  7768. GetNextInstruction(hp1,hp1);
  7769. until not(assigned(hp1)) or
  7770. not(CanBeCMOV(hp1));
  7771. condition:=inverse_cond(condition);
  7772. hp1 := hpmov2;
  7773. { hp1 is now at <several movs 2> }
  7774. while Assigned(hp1) and CanBeCMOV(hp1) do
  7775. begin
  7776. taicpu(hp1).opcode:=A_CMOVcc;
  7777. taicpu(hp1).condition:=condition;
  7778. UpdateUsedRegs(hp1);
  7779. GetNextInstruction(hp1,hp1);
  7780. end;
  7781. hp1 := p;
  7782. { Get first instruction after label }
  7783. GetNextInstruction(hp3, p);
  7784. if assigned(p) and (hp3.typ = ait_align) then
  7785. GetNextInstruction(p, p);
  7786. { Don't dereference yet, as doing so will cause
  7787. GetNextInstruction to skip the label and
  7788. optional align marker. [Kit] }
  7789. GetNextInstruction(hp2, hp4);
  7790. DebugMsg(SPeepholeOptimization+'JccMovJmpMov2CMovCMov',hp1);
  7791. { remove jCC }
  7792. RemoveInstruction(hp1);
  7793. { Now we can safely decrement it }
  7794. tasmlabel(symbol).decrefs;
  7795. { Remove label xxx (it will have a ref of zero due to the initial check }
  7796. StripLabelFast(hp4);
  7797. { remove jmp }
  7798. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  7799. RemoveInstruction(hp2);
  7800. { As before, now we can safely decrement it }
  7801. tasmlabel(symbol).decrefs;
  7802. { Remove label yyy (and the optional alignment) if its reference falls to zero }
  7803. if tasmlabel(symbol).getrefs = 0 then
  7804. StripLabelFast(hp3);
  7805. if Assigned(p) then
  7806. begin
  7807. UpdateUsedRegs(p);
  7808. result:=true;
  7809. end;
  7810. exit;
  7811. end;
  7812. end;
  7813. end;
  7814. end;
  7815. {$endif i8086}
  7816. end;
  7817. end;
  7818. end;
  7819. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  7820. var
  7821. hp1,hp2: tai;
  7822. reg_and_hp1_is_instr: Boolean;
  7823. begin
  7824. result:=false;
  7825. reg_and_hp1_is_instr:=(taicpu(p).oper[1]^.typ = top_reg) and
  7826. GetNextInstruction(p,hp1) and
  7827. (hp1.typ = ait_instruction);
  7828. if reg_and_hp1_is_instr and
  7829. (
  7830. (taicpu(hp1).opcode <> A_LEA) or
  7831. { If the LEA instruction can be converted into an arithmetic instruction,
  7832. it may be possible to then fold it. }
  7833. (
  7834. { If the flags register is in use, don't change the instruction
  7835. to an ADD otherwise this will scramble the flags. [Kit] }
  7836. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  7837. ConvertLEA(taicpu(hp1))
  7838. )
  7839. ) and
  7840. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  7841. GetNextInstruction(hp1,hp2) and
  7842. MatchInstruction(hp2,A_MOV,[]) and
  7843. (taicpu(hp2).oper[0]^.typ = top_reg) and
  7844. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  7845. ((taicpu(p).opsize in [S_BW,S_BL]) and (taicpu(hp2).opsize=S_B) or
  7846. (taicpu(p).opsize in [S_WL]) and (taicpu(hp2).opsize=S_W)) and
  7847. {$ifdef i386}
  7848. { not all registers have byte size sub registers on i386 }
  7849. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  7850. {$endif i386}
  7851. (((taicpu(hp1).ops=2) and
  7852. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  7853. ((taicpu(hp1).ops=1) and
  7854. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  7855. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  7856. begin
  7857. { change movsX/movzX reg/ref, reg2
  7858. add/sub/or/... reg3/$const, reg2
  7859. mov reg2 reg/ref
  7860. to add/sub/or/... reg3/$const, reg/ref }
  7861. { by example:
  7862. movswl %si,%eax movswl %si,%eax p
  7863. decl %eax addl %edx,%eax hp1
  7864. movw %ax,%si movw %ax,%si hp2
  7865. ->
  7866. movswl %si,%eax movswl %si,%eax p
  7867. decw %eax addw %edx,%eax hp1
  7868. movw %ax,%si movw %ax,%si hp2
  7869. }
  7870. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  7871. {
  7872. ->
  7873. movswl %si,%eax movswl %si,%eax p
  7874. decw %si addw %dx,%si hp1
  7875. movw %ax,%si movw %ax,%si hp2
  7876. }
  7877. case taicpu(hp1).ops of
  7878. 1:
  7879. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  7880. 2:
  7881. begin
  7882. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  7883. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  7884. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  7885. end;
  7886. else
  7887. internalerror(2008042702);
  7888. end;
  7889. {
  7890. ->
  7891. decw %si addw %dx,%si p
  7892. }
  7893. DebugMsg(SPeepholeOptimization + 'var3',p);
  7894. RemoveCurrentP(p, hp1);
  7895. RemoveInstruction(hp2);
  7896. end
  7897. else if reg_and_hp1_is_instr and
  7898. (taicpu(hp1).opcode = A_MOV) and
  7899. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7900. (MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^)
  7901. {$ifdef x86_64}
  7902. { check for implicit extension to 64 bit }
  7903. or
  7904. ((taicpu(p).opsize in [S_BL,S_WL]) and
  7905. (taicpu(hp1).opsize=S_Q) and
  7906. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg)
  7907. )
  7908. {$endif x86_64}
  7909. )
  7910. then
  7911. begin
  7912. { change
  7913. movx %reg1,%reg2
  7914. mov %reg2,%reg3
  7915. dealloc %reg2
  7916. into
  7917. movx %reg,%reg3
  7918. }
  7919. TransferUsedRegs(TmpUsedRegs);
  7920. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7921. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  7922. begin
  7923. DebugMsg(SPeepholeOptimization + 'MovxMov2Movx',p);
  7924. {$ifdef x86_64}
  7925. if (taicpu(p).opsize in [S_BL,S_WL]) and
  7926. (taicpu(hp1).opsize=S_Q) then
  7927. taicpu(p).loadreg(1,newreg(R_INTREGISTER,getsupreg(taicpu(hp1).oper[1]^.reg),R_SUBD))
  7928. else
  7929. {$endif x86_64}
  7930. taicpu(p).loadreg(1,taicpu(hp1).oper[1]^.reg);
  7931. RemoveInstruction(hp1);
  7932. end;
  7933. end
  7934. else if reg_and_hp1_is_instr and
  7935. (taicpu(hp1).opcode = A_MOV) and
  7936. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  7937. (((taicpu(p).opsize in [S_BW,S_BL,S_WL{$ifdef x86_64},S_BQ,S_WQ,S_LQ{$endif x86_64}]) and
  7938. (taicpu(hp1).opsize=S_B)) or
  7939. ((taicpu(p).opsize in [S_WL{$ifdef x86_64},S_WQ,S_LQ{$endif x86_64}]) and
  7940. (taicpu(hp1).opsize=S_W))
  7941. {$ifdef x86_64}
  7942. or ((taicpu(p).opsize=S_LQ) and
  7943. (taicpu(hp1).opsize=S_L))
  7944. {$endif x86_64}
  7945. ) and
  7946. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^.reg) then
  7947. begin
  7948. { change
  7949. movx %reg1,%reg2
  7950. mov %reg2,%reg3
  7951. dealloc %reg2
  7952. into
  7953. mov %reg1,%reg3
  7954. if the second mov accesses only the bits stored in reg1
  7955. }
  7956. TransferUsedRegs(TmpUsedRegs);
  7957. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  7958. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  7959. begin
  7960. DebugMsg(SPeepholeOptimization + 'MovxMov2Mov',p);
  7961. if taicpu(p).oper[0]^.typ=top_reg then
  7962. begin
  7963. case taicpu(hp1).opsize of
  7964. S_B:
  7965. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBL));
  7966. S_W:
  7967. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBW));
  7968. S_L:
  7969. taicpu(hp1).loadreg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[0]^.reg),R_SUBD));
  7970. else
  7971. Internalerror(2020102301);
  7972. end;
  7973. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  7974. end
  7975. else
  7976. taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
  7977. RemoveCurrentP(p);
  7978. result:=true;
  7979. exit;
  7980. end;
  7981. end
  7982. else if reg_and_hp1_is_instr and
  7983. (taicpu(p).oper[0]^.typ = top_reg) and
  7984. (
  7985. (taicpu(hp1).opcode = A_SHL) or (taicpu(hp1).opcode = A_SAL)
  7986. ) and
  7987. (taicpu(hp1).oper[0]^.typ = top_const) and
  7988. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  7989. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  7990. { Minimum shift value allowed is the bit difference between the sizes }
  7991. (taicpu(hp1).oper[0]^.val >=
  7992. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  7993. 8 * (
  7994. tcgsize2size[reg_cgsize(taicpu(p).oper[1]^.reg)] -
  7995. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  7996. )
  7997. ) then
  7998. begin
  7999. { For:
  8000. movsx/movzx %reg1,%reg1 (same register, just different sizes)
  8001. shl/sal ##, %reg1
  8002. Remove the movsx/movzx instruction if the shift overwrites the
  8003. extended bits of the register (e.g. movslq %eax,%rax; shlq $32,%rax
  8004. }
  8005. DebugMsg(SPeepholeOptimization + 'MovxShl2Shl',p);
  8006. RemoveCurrentP(p, hp1);
  8007. Result := True;
  8008. Exit;
  8009. end
  8010. else if reg_and_hp1_is_instr and
  8011. (taicpu(p).oper[0]^.typ = top_reg) and
  8012. (
  8013. ((taicpu(hp1).opcode = A_SHR) and (taicpu(p).opcode = A_MOVZX)) or
  8014. ((taicpu(hp1).opcode = A_SAR) and (taicpu(p).opcode <> A_MOVZX))
  8015. ) and
  8016. (taicpu(hp1).oper[0]^.typ = top_const) and
  8017. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  8018. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8019. { Minimum shift value allowed is the bit size of the smallest register - 1 }
  8020. (taicpu(hp1).oper[0]^.val <
  8021. { Multiply by 8 because tcgsize2size returns bytes, not bits }
  8022. 8 * (
  8023. tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)]
  8024. )
  8025. ) then
  8026. begin
  8027. { For:
  8028. movsx %reg1,%reg1 movzx %reg1,%reg1 (same register, just different sizes)
  8029. sar ##, %reg1 shr ##, %reg1
  8030. Move the shift to before the movx instruction if the shift value
  8031. is not too large.
  8032. }
  8033. asml.Remove(hp1);
  8034. asml.InsertBefore(hp1, p);
  8035. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  8036. case taicpu(p).opsize of
  8037. s_BW, S_BL{$ifdef x86_64}, S_BQ{$endif}:
  8038. taicpu(hp1).opsize := S_B;
  8039. S_WL{$ifdef x86_64}, S_WQ{$endif}:
  8040. taicpu(hp1).opsize := S_W;
  8041. {$ifdef x86_64}
  8042. S_LQ:
  8043. taicpu(hp1).opsize := S_L;
  8044. {$endif}
  8045. else
  8046. InternalError(2020112401);
  8047. end;
  8048. if (taicpu(hp1).opcode = A_SHR) then
  8049. DebugMsg(SPeepholeOptimization + 'MovzShr2ShrMovz', hp1)
  8050. else
  8051. DebugMsg(SPeepholeOptimization + 'MovsSar2SarMovs', hp1);
  8052. Result := True;
  8053. end
  8054. else if taicpu(p).opcode=A_MOVZX then
  8055. begin
  8056. { removes superfluous And's after movzx's }
  8057. if reg_and_hp1_is_instr and
  8058. (taicpu(hp1).opcode = A_AND) and
  8059. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8060. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)
  8061. {$ifdef x86_64}
  8062. { check for implicit extension to 64 bit }
  8063. or
  8064. ((taicpu(p).opsize in [S_BL,S_WL]) and
  8065. (taicpu(hp1).opsize=S_Q) and
  8066. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg)
  8067. )
  8068. {$endif x86_64}
  8069. )
  8070. then
  8071. begin
  8072. case taicpu(p).opsize Of
  8073. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8074. if (taicpu(hp1).oper[0]^.val = $ff) then
  8075. begin
  8076. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz1',p);
  8077. RemoveInstruction(hp1);
  8078. Result:=true;
  8079. exit;
  8080. end;
  8081. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8082. if (taicpu(hp1).oper[0]^.val = $ffff) then
  8083. begin
  8084. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz2',p);
  8085. RemoveInstruction(hp1);
  8086. Result:=true;
  8087. exit;
  8088. end;
  8089. {$ifdef x86_64}
  8090. S_LQ:
  8091. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  8092. begin
  8093. DebugMsg(SPeepholeOptimization + 'MovzAnd2Movz3',p);
  8094. RemoveInstruction(hp1);
  8095. Result:=true;
  8096. exit;
  8097. end;
  8098. {$endif x86_64}
  8099. else
  8100. ;
  8101. end;
  8102. { we cannot get rid of the and, but can we get rid of the movz ?}
  8103. if SuperRegistersEqual(taicpu(p).oper[0]^.reg,taicpu(p).oper[1]^.reg) then
  8104. begin
  8105. case taicpu(p).opsize Of
  8106. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  8107. if (taicpu(hp1).oper[0]^.val and $ff)=taicpu(hp1).oper[0]^.val then
  8108. begin
  8109. DebugMsg(SPeepholeOptimization + 'MovzAnd2And1',p);
  8110. RemoveCurrentP(p,hp1);
  8111. Result:=true;
  8112. exit;
  8113. end;
  8114. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  8115. if (taicpu(hp1).oper[0]^.val and $ffff)=taicpu(hp1).oper[0]^.val then
  8116. begin
  8117. DebugMsg(SPeepholeOptimization + 'MovzAnd2And2',p);
  8118. RemoveCurrentP(p,hp1);
  8119. Result:=true;
  8120. exit;
  8121. end;
  8122. {$ifdef x86_64}
  8123. S_LQ:
  8124. if (taicpu(hp1).oper[0]^.val and $ffffffff)=taicpu(hp1).oper[0]^.val then
  8125. begin
  8126. DebugMsg(SPeepholeOptimization + 'MovzAnd2And3',p);
  8127. RemoveCurrentP(p,hp1);
  8128. Result:=true;
  8129. exit;
  8130. end;
  8131. {$endif x86_64}
  8132. else
  8133. ;
  8134. end;
  8135. end;
  8136. end;
  8137. { changes some movzx constructs to faster synonyms (all examples
  8138. are given with eax/ax, but are also valid for other registers)}
  8139. if MatchOpType(taicpu(p),top_reg,top_reg) then
  8140. begin
  8141. case taicpu(p).opsize of
  8142. { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
  8143. (the machine code is equivalent to movzbl %al,%eax), but the
  8144. code generator still generates that assembler instruction and
  8145. it is silently converted. This should probably be checked.
  8146. [Kit] }
  8147. S_BW:
  8148. begin
  8149. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8150. (
  8151. not IsMOVZXAcceptable
  8152. { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
  8153. or (
  8154. (cs_opt_size in current_settings.optimizerswitches) and
  8155. (taicpu(p).oper[1]^.reg = NR_AX)
  8156. )
  8157. ) then
  8158. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  8159. begin
  8160. DebugMsg(SPeepholeOptimization + 'var7',p);
  8161. taicpu(p).opcode := A_AND;
  8162. taicpu(p).changeopsize(S_W);
  8163. taicpu(p).loadConst(0,$ff);
  8164. Result := True;
  8165. end
  8166. else if not IsMOVZXAcceptable and
  8167. GetNextInstruction(p, hp1) and
  8168. (tai(hp1).typ = ait_instruction) and
  8169. (taicpu(hp1).opcode = A_AND) and
  8170. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8171. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8172. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  8173. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  8174. begin
  8175. DebugMsg(SPeepholeOptimization + 'var8',p);
  8176. taicpu(p).opcode := A_MOV;
  8177. taicpu(p).changeopsize(S_W);
  8178. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  8179. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8180. Result := True;
  8181. end;
  8182. end;
  8183. {$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
  8184. S_BL:
  8185. begin
  8186. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  8187. (
  8188. not IsMOVZXAcceptable
  8189. { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
  8190. or (
  8191. (cs_opt_size in current_settings.optimizerswitches) and
  8192. (taicpu(p).oper[1]^.reg = NR_EAX)
  8193. )
  8194. ) then
  8195. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  8196. begin
  8197. DebugMsg(SPeepholeOptimization + 'var9',p);
  8198. taicpu(p).opcode := A_AND;
  8199. taicpu(p).changeopsize(S_L);
  8200. taicpu(p).loadConst(0,$ff);
  8201. Result := True;
  8202. end
  8203. else if not IsMOVZXAcceptable and
  8204. GetNextInstruction(p, hp1) and
  8205. (tai(hp1).typ = ait_instruction) and
  8206. (taicpu(hp1).opcode = A_AND) and
  8207. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8208. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8209. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  8210. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  8211. begin
  8212. DebugMsg(SPeepholeOptimization + 'var10',p);
  8213. taicpu(p).opcode := A_MOV;
  8214. taicpu(p).changeopsize(S_L);
  8215. { do not use R_SUBWHOLE
  8216. as movl %rdx,%eax
  8217. is invalid in assembler PM }
  8218. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8219. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8220. Result := True;
  8221. end;
  8222. end;
  8223. {$endif i8086}
  8224. S_WL:
  8225. if not IsMOVZXAcceptable then
  8226. begin
  8227. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
  8228. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  8229. begin
  8230. DebugMsg(SPeepholeOptimization + 'var11',p);
  8231. taicpu(p).opcode := A_AND;
  8232. taicpu(p).changeopsize(S_L);
  8233. taicpu(p).loadConst(0,$ffff);
  8234. Result := True;
  8235. end
  8236. else if GetNextInstruction(p, hp1) and
  8237. (tai(hp1).typ = ait_instruction) and
  8238. (taicpu(hp1).opcode = A_AND) and
  8239. (taicpu(hp1).oper[0]^.typ = top_const) and
  8240. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8241. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8242. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  8243. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  8244. begin
  8245. DebugMsg(SPeepholeOptimization + 'var12',p);
  8246. taicpu(p).opcode := A_MOV;
  8247. taicpu(p).changeopsize(S_L);
  8248. { do not use R_SUBWHOLE
  8249. as movl %rdx,%eax
  8250. is invalid in assembler PM }
  8251. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  8252. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8253. Result := True;
  8254. end;
  8255. end;
  8256. else
  8257. InternalError(2017050705);
  8258. end;
  8259. end
  8260. else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
  8261. begin
  8262. if GetNextInstruction(p, hp1) and
  8263. (tai(hp1).typ = ait_instruction) and
  8264. (taicpu(hp1).opcode = A_AND) and
  8265. MatchOpType(taicpu(hp1),top_const,top_reg) and
  8266. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  8267. begin
  8268. //taicpu(p).opcode := A_MOV;
  8269. case taicpu(p).opsize Of
  8270. S_BL:
  8271. begin
  8272. DebugMsg(SPeepholeOptimization + 'var13',p);
  8273. taicpu(hp1).changeopsize(S_L);
  8274. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8275. end;
  8276. S_WL:
  8277. begin
  8278. DebugMsg(SPeepholeOptimization + 'var14',p);
  8279. taicpu(hp1).changeopsize(S_L);
  8280. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  8281. end;
  8282. S_BW:
  8283. begin
  8284. DebugMsg(SPeepholeOptimization + 'var15',p);
  8285. taicpu(hp1).changeopsize(S_W);
  8286. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  8287. end;
  8288. else
  8289. Internalerror(2017050704)
  8290. end;
  8291. Result := True;
  8292. end;
  8293. end;
  8294. end;
  8295. end;
  8296. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  8297. var
  8298. hp1, hp2 : tai;
  8299. MaskLength : Cardinal;
  8300. MaskedBits : TCgInt;
  8301. begin
  8302. Result:=false;
  8303. { There are no optimisations for reference targets }
  8304. if (taicpu(p).oper[1]^.typ <> top_reg) then
  8305. Exit;
  8306. while GetNextInstruction(p, hp1) and
  8307. (hp1.typ = ait_instruction) do
  8308. begin
  8309. if (taicpu(p).oper[0]^.typ = top_const) then
  8310. begin
  8311. case taicpu(hp1).opcode of
  8312. A_AND:
  8313. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8314. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8315. { the second register must contain the first one, so compare their subreg types }
  8316. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  8317. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  8318. { change
  8319. and const1, reg
  8320. and const2, reg
  8321. to
  8322. and (const1 and const2), reg
  8323. }
  8324. begin
  8325. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  8326. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  8327. RemoveCurrentP(p, hp1);
  8328. Result:=true;
  8329. exit;
  8330. end;
  8331. A_CMP:
  8332. if (PopCnt(DWord(taicpu(p).oper[0]^.val)) = 1) and { Only 1 bit set }
  8333. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.val) and
  8334. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
  8335. { Just check that the condition on the next instruction is compatible }
  8336. GetNextInstruction(hp1, hp2) and
  8337. (hp2.typ = ait_instruction) and
  8338. (taicpu(hp2).condition in [C_Z, C_E, C_NZ, C_NE])
  8339. then
  8340. { change
  8341. and 2^n, reg
  8342. cmp 2^n, reg
  8343. j(c) / set(c) / cmov(c) (c is equal or not equal)
  8344. to
  8345. and 2^n, reg
  8346. test reg, reg
  8347. j(~c) / set(~c) / cmov(~c)
  8348. }
  8349. begin
  8350. { Keep TEST instruction in, rather than remove it, because
  8351. it may trigger other optimisations such as MovAndTest2Test }
  8352. taicpu(hp1).loadreg(0, taicpu(hp1).oper[1]^.reg);
  8353. taicpu(hp1).opcode := A_TEST;
  8354. DebugMsg(SPeepholeOptimization + 'AND/CMP/J(c) -> AND/J(~c) with power of 2 constant', p);
  8355. taicpu(hp2).condition := inverse_cond(taicpu(hp2).condition);
  8356. Result := True;
  8357. Exit;
  8358. end;
  8359. A_MOVZX:
  8360. if MatchOpType(taicpu(hp1),top_reg,top_reg) and
  8361. SuperRegistersEqual(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^.reg) and
  8362. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  8363. (
  8364. (
  8365. (taicpu(p).opsize=S_W) and
  8366. (taicpu(hp1).opsize=S_BW)
  8367. ) or
  8368. (
  8369. (taicpu(p).opsize=S_L) and
  8370. (taicpu(hp1).opsize in [S_WL,S_BL{$ifdef x86_64},S_BQ,S_WQ{$endif x86_64}])
  8371. )
  8372. {$ifdef x86_64}
  8373. or
  8374. (
  8375. (taicpu(p).opsize=S_Q) and
  8376. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_BL,S_WL])
  8377. )
  8378. {$endif x86_64}
  8379. ) then
  8380. begin
  8381. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8382. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  8383. ) or
  8384. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8385. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  8386. then
  8387. begin
  8388. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  8389. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  8390. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  8391. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  8392. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  8393. }
  8394. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  8395. RemoveInstruction(hp1);
  8396. { See if there are other optimisations possible }
  8397. Continue;
  8398. end;
  8399. end;
  8400. A_SHL:
  8401. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8402. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  8403. begin
  8404. {$ifopt R+}
  8405. {$define RANGE_WAS_ON}
  8406. {$R-}
  8407. {$endif}
  8408. { get length of potential and mask }
  8409. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  8410. { really a mask? }
  8411. {$ifdef RANGE_WAS_ON}
  8412. {$R+}
  8413. {$endif}
  8414. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  8415. { unmasked part shifted out? }
  8416. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  8417. begin
  8418. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  8419. RemoveCurrentP(p, hp1);
  8420. Result:=true;
  8421. exit;
  8422. end;
  8423. end;
  8424. A_SHR:
  8425. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  8426. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
  8427. (taicpu(hp1).oper[0]^.val <= 63) then
  8428. begin
  8429. { Does SHR combined with the AND cover all the bits?
  8430. e.g. for "andb $252,%reg; shrb $2,%reg" - the "and" can be removed }
  8431. MaskedBits := taicpu(p).oper[0]^.val or ((TCgInt(1) shl taicpu(hp1).oper[0]^.val) - 1);
  8432. if ((taicpu(p).opsize = S_B) and ((MaskedBits and $FF) = $FF)) or
  8433. ((taicpu(p).opsize = S_W) and ((MaskedBits and $FFFF) = $FFFF)) or
  8434. ((taicpu(p).opsize = S_L) and ((MaskedBits and $FFFFFFFF) = $FFFFFFFF)) then
  8435. begin
  8436. DebugMsg(SPeepholeOptimization + 'AndShrToShr done', p);
  8437. RemoveCurrentP(p, hp1);
  8438. Result := True;
  8439. Exit;
  8440. end;
  8441. end;
  8442. A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
  8443. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  8444. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg) then
  8445. begin
  8446. if SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8447. (
  8448. (
  8449. (taicpu(hp1).opsize in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  8450. ((taicpu(p).oper[0]^.val and $7F) = taicpu(p).oper[0]^.val)
  8451. ) or (
  8452. (taicpu(hp1).opsize in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  8453. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val)
  8454. {$ifdef x86_64}
  8455. ) or (
  8456. (taicpu(hp1).opsize = S_LQ) and
  8457. ((taicpu(p).oper[0]^.val and $7fffffff) = taicpu(p).oper[0]^.val)
  8458. {$endif x86_64}
  8459. )
  8460. ) then
  8461. begin
  8462. if (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg){$ifdef x86_64} or (taicpu(hp1).opsize = S_LQ){$endif x86_64} then
  8463. begin
  8464. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  8465. RemoveInstruction(hp1);
  8466. { See if there are other optimisations possible }
  8467. Continue;
  8468. end;
  8469. { The super-registers are the same though.
  8470. Note that this change by itself doesn't improve
  8471. code speed, but it opens up other optimisations. }
  8472. {$ifdef x86_64}
  8473. { Convert 64-bit register to 32-bit }
  8474. case taicpu(hp1).opsize of
  8475. S_BQ:
  8476. begin
  8477. taicpu(hp1).opsize := S_BL;
  8478. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8479. end;
  8480. S_WQ:
  8481. begin
  8482. taicpu(hp1).opsize := S_WL;
  8483. taicpu(hp1).oper[1]^.reg := newreg(R_INTREGISTER, getsupreg(taicpu(hp1).oper[1]^.reg), R_SUBD);
  8484. end
  8485. else
  8486. ;
  8487. end;
  8488. {$endif x86_64}
  8489. DebugMsg(SPeepholeOptimization + 'AndMovsxToAndMovzx', hp1);
  8490. taicpu(hp1).opcode := A_MOVZX;
  8491. { See if there are other optimisations possible }
  8492. Continue;
  8493. end;
  8494. end;
  8495. else
  8496. ;
  8497. end;
  8498. end;
  8499. if (taicpu(hp1).is_jmp) and
  8500. (taicpu(hp1).opcode<>A_JMP) and
  8501. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  8502. begin
  8503. { change
  8504. and x, reg
  8505. jxx
  8506. to
  8507. test x, reg
  8508. jxx
  8509. if reg is deallocated before the
  8510. jump, but only if it's a conditional jump (PFV)
  8511. }
  8512. taicpu(p).opcode := A_TEST;
  8513. Exit;
  8514. end;
  8515. Break;
  8516. end;
  8517. { Lone AND tests }
  8518. if (taicpu(p).oper[0]^.typ = top_const) then
  8519. begin
  8520. {
  8521. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  8522. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  8523. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  8524. }
  8525. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  8526. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  8527. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  8528. begin
  8529. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg);
  8530. if taicpu(p).opsize = S_L then
  8531. begin
  8532. Include(OptsToCheck,aoc_MovAnd2Mov_3);
  8533. Result := True;
  8534. end;
  8535. end;
  8536. end;
  8537. { Backward check to determine necessity of and %reg,%reg }
  8538. if (taicpu(p).oper[0]^.typ = top_reg) and
  8539. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  8540. not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
  8541. GetLastInstruction(p, hp2) and
  8542. RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp2) and
  8543. { Check size of adjacent instruction to determine if the AND is
  8544. effectively a null operation }
  8545. (
  8546. (taicpu(p).opsize = taicpu(hp2).opsize) or
  8547. { Note: Don't include S_Q }
  8548. ((taicpu(p).opsize = S_L) and (taicpu(hp2).opsize in [S_BL, S_WL])) or
  8549. ((taicpu(p).opsize = S_W) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_L])) or
  8550. ((taicpu(p).opsize = S_B) and (taicpu(hp2).opsize in [S_BW, S_BL, S_WL, S_W, S_L]))
  8551. ) then
  8552. begin
  8553. DebugMsg(SPeepholeOptimization + 'And2Nop', p);
  8554. { If GetNextInstruction returned False, hp1 will be nil }
  8555. RemoveCurrentP(p, hp1);
  8556. Result := True;
  8557. Exit;
  8558. end;
  8559. end;
  8560. function TX86AsmOptimizer.OptPass2ADD(var p : tai) : boolean;
  8561. var
  8562. hp1: tai; NewRef: TReference;
  8563. { This entire nested function is used in an if-statement below, but we
  8564. want to avoid all the used reg transfers and GetNextInstruction calls
  8565. until we really have to check }
  8566. function MemRegisterNotUsedLater: Boolean; inline;
  8567. var
  8568. hp2: tai;
  8569. begin
  8570. TransferUsedRegs(TmpUsedRegs);
  8571. hp2 := p;
  8572. repeat
  8573. UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
  8574. until not GetNextInstruction(hp2, hp2) or (hp2 = hp1);
  8575. Result := not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs);
  8576. end;
  8577. begin
  8578. Result := False;
  8579. if not GetNextInstruction(p, hp1) or (hp1.typ <> ait_instruction) then
  8580. Exit;
  8581. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif}]) then
  8582. begin
  8583. { Change:
  8584. add %reg2,%reg1
  8585. mov/s/z #(%reg1),%reg1 (%reg1 superregisters must be the same)
  8586. To:
  8587. mov/s/z #(%reg1,%reg2),%reg1
  8588. }
  8589. if MatchOpType(taicpu(p), top_reg, top_reg) and
  8590. MatchInstruction(hp1, [A_MOV, A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif}], []) and
  8591. MatchOpType(taicpu(hp1), top_ref, top_reg) and
  8592. (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
  8593. (
  8594. (
  8595. (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
  8596. (taicpu(hp1).oper[0]^.ref^.index = NR_NO) and
  8597. { r/esp cannot be an index }
  8598. (taicpu(p).oper[0]^.reg<>NR_STACK_POINTER_REG)
  8599. ) or (
  8600. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
  8601. (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
  8602. )
  8603. ) and (
  8604. Reg1WriteOverwritesReg2Entirely(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) or
  8605. (
  8606. { If the super registers ARE equal, then this MOV/S/Z does a partial write }
  8607. not SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) and
  8608. MemRegisterNotUsedLater
  8609. )
  8610. ) then
  8611. begin
  8612. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[1]^.reg;
  8613. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  8614. DebugMsg(SPeepholeOptimization + 'AddMov2Mov done', p);
  8615. RemoveCurrentp(p, hp1);
  8616. Result := True;
  8617. Exit;
  8618. end;
  8619. { Change:
  8620. addl/q $x,%reg1
  8621. movl/q %reg1,%reg2
  8622. To:
  8623. leal/q $x(%reg1),%reg2
  8624. addl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8625. Breaks the dependency chain.
  8626. }
  8627. if MatchOpType(taicpu(p),top_const,top_reg) and
  8628. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8629. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8630. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8631. (
  8632. { Don't do AddMov2LeaAdd under -Os, but do allow AddMov2Lea }
  8633. not (cs_opt_size in current_settings.optimizerswitches) or
  8634. (
  8635. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8636. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8637. )
  8638. ) then
  8639. begin
  8640. { Change the MOV instruction to a LEA instruction, and update the
  8641. first operand }
  8642. reference_reset(NewRef, 1, []);
  8643. NewRef.base := taicpu(p).oper[1]^.reg;
  8644. NewRef.scalefactor := 1;
  8645. NewRef.offset := taicpu(p).oper[0]^.val;
  8646. taicpu(hp1).opcode := A_LEA;
  8647. taicpu(hp1).loadref(0, NewRef);
  8648. TransferUsedRegs(TmpUsedRegs);
  8649. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8650. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8651. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8652. begin
  8653. { Move what is now the LEA instruction to before the SUB instruction }
  8654. Asml.Remove(hp1);
  8655. Asml.InsertBefore(hp1, p);
  8656. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8657. DebugMsg(SPeepholeOptimization + 'AddMov2LeaAdd', p);
  8658. p := hp1;
  8659. end
  8660. else
  8661. begin
  8662. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8663. RemoveCurrentP(p, hp1);
  8664. DebugMsg(SPeepholeOptimization + 'AddMov2Lea', p);
  8665. end;
  8666. Result := True;
  8667. end;
  8668. end;
  8669. end;
  8670. function TX86AsmOptimizer.OptPass2Lea(var p : tai) : Boolean;
  8671. begin
  8672. Result:=false;
  8673. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8674. begin
  8675. if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  8676. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  8677. begin
  8678. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  8679. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  8680. taicpu(p).opcode:=A_ADD;
  8681. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  8682. result:=true;
  8683. end
  8684. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  8685. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  8686. begin
  8687. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  8688. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  8689. taicpu(p).opcode:=A_ADD;
  8690. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  8691. result:=true;
  8692. end;
  8693. end;
  8694. end;
  8695. function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
  8696. var
  8697. hp1: tai; NewRef: TReference;
  8698. begin
  8699. { Change:
  8700. subl/q $x,%reg1
  8701. movl/q %reg1,%reg2
  8702. To:
  8703. leal/q $-x(%reg1),%reg2
  8704. subl/q $x,%reg1 (can be removed if %reg1 or the flags are not used afterwards)
  8705. Breaks the dependency chain and potentially permits the removal of
  8706. a CMP instruction if one follows.
  8707. }
  8708. Result := False;
  8709. if (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
  8710. MatchOpType(taicpu(p),top_const,top_reg) and
  8711. GetNextInstruction(p, hp1) and
  8712. MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
  8713. (taicpu(hp1).oper[1]^.typ = top_reg) and
  8714. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
  8715. (
  8716. { Don't do SubMov2LeaSub under -Os, but do allow SubMov2Lea }
  8717. not (cs_opt_size in current_settings.optimizerswitches) or
  8718. (
  8719. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) and
  8720. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)
  8721. )
  8722. ) then
  8723. begin
  8724. { Change the MOV instruction to a LEA instruction, and update the
  8725. first operand }
  8726. reference_reset(NewRef, 1, []);
  8727. NewRef.base := taicpu(p).oper[1]^.reg;
  8728. NewRef.scalefactor := 1;
  8729. NewRef.offset := -taicpu(p).oper[0]^.val;
  8730. taicpu(hp1).opcode := A_LEA;
  8731. taicpu(hp1).loadref(0, NewRef);
  8732. TransferUsedRegs(TmpUsedRegs);
  8733. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  8734. if RegUsedAfterInstruction(NewRef.base, hp1, TmpUsedRegs) or
  8735. RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs) then
  8736. begin
  8737. { Move what is now the LEA instruction to before the SUB instruction }
  8738. Asml.Remove(hp1);
  8739. Asml.InsertBefore(hp1, p);
  8740. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
  8741. DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
  8742. p := hp1;
  8743. end
  8744. else
  8745. begin
  8746. { Since %reg1 or the flags aren't used afterwards, we can delete p completely }
  8747. RemoveCurrentP(p, hp1);
  8748. DebugMsg(SPeepholeOptimization + 'SubMov2Lea', p);
  8749. end;
  8750. Result := True;
  8751. end;
  8752. end;
  8753. function TX86AsmOptimizer.SkipSimpleInstructions(var hp1 : tai) : Boolean;
  8754. begin
  8755. { we can skip all instructions not messing with the stack pointer }
  8756. while assigned(hp1) and {MatchInstruction(hp1,[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
  8757. A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
  8758. A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
  8759. A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
  8760. ({(taicpu(hp1).ops=0) or }
  8761. ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
  8762. (MatchOpType(taicpu(hp1),top_ref,top_reg))
  8763. ) and }
  8764. not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
  8765. )
  8766. ) do
  8767. GetNextInstruction(hp1,hp1);
  8768. Result:=assigned(hp1);
  8769. end;
  8770. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  8771. var
  8772. hp1, hp2, hp3, hp4, hp5: tai;
  8773. begin
  8774. Result:=false;
  8775. hp5:=nil;
  8776. { replace
  8777. leal(q) x(<stackpointer>),<stackpointer>
  8778. call procname
  8779. leal(q) -x(<stackpointer>),<stackpointer>
  8780. ret
  8781. by
  8782. jmp procname
  8783. but do it only on level 4 because it destroys stack back traces
  8784. }
  8785. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8786. MatchOpType(taicpu(p),top_ref,top_reg) and
  8787. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8788. (taicpu(p).oper[0]^.ref^.index=NR_NO) and
  8789. { the -8 or -24 are not required, but bail out early if possible,
  8790. higher values are unlikely }
  8791. ((taicpu(p).oper[0]^.ref^.offset=-8) or
  8792. (taicpu(p).oper[0]^.ref^.offset=-24)) and
  8793. (taicpu(p).oper[0]^.ref^.symbol=nil) and
  8794. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  8795. (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
  8796. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8797. GetNextInstruction(p, hp1) and
  8798. { Take a copy of hp1 }
  8799. SetAndTest(hp1, hp4) and
  8800. { trick to skip label }
  8801. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8802. SkipSimpleInstructions(hp1) and
  8803. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8804. GetNextInstruction(hp1, hp2) and
  8805. MatchInstruction(hp2,A_LEA,[taicpu(p).opsize]) and
  8806. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  8807. (taicpu(hp2).oper[0]^.ref^.offset=-taicpu(p).oper[0]^.ref^.offset) and
  8808. (taicpu(hp2).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  8809. (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
  8810. (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
  8811. (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
  8812. (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
  8813. (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
  8814. GetNextInstruction(hp2, hp3) and
  8815. { trick to skip label }
  8816. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8817. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8818. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8819. SetAndTest(hp3,hp5) and
  8820. GetNextInstruction(hp3,hp3) and
  8821. MatchInstruction(hp3,A_RET,[S_NO])
  8822. )
  8823. ) and
  8824. (taicpu(hp3).ops=0) then
  8825. begin
  8826. taicpu(hp1).opcode := A_JMP;
  8827. taicpu(hp1).is_jmp := true;
  8828. DebugMsg(SPeepholeOptimization + 'LeaCallLeaRet2Jmp done',p);
  8829. RemoveCurrentP(p, hp4);
  8830. RemoveInstruction(hp2);
  8831. RemoveInstruction(hp3);
  8832. if Assigned(hp5) then
  8833. begin
  8834. AsmL.Remove(hp5);
  8835. ASmL.InsertBefore(hp5,hp1)
  8836. end;
  8837. Result:=true;
  8838. end;
  8839. end;
  8840. function TX86AsmOptimizer.PostPeepholeOptPush(var p : tai) : Boolean;
  8841. {$ifdef x86_64}
  8842. var
  8843. hp1, hp2, hp3, hp4, hp5: tai;
  8844. {$endif x86_64}
  8845. begin
  8846. Result:=false;
  8847. {$ifdef x86_64}
  8848. hp5:=nil;
  8849. { replace
  8850. push %rax
  8851. call procname
  8852. pop %rcx
  8853. ret
  8854. by
  8855. jmp procname
  8856. but do it only on level 4 because it destroys stack back traces
  8857. It depends on the fact, that the sequence push rax/pop rcx is used for stack alignment as rcx is volatile
  8858. for all supported calling conventions
  8859. }
  8860. if (cs_opt_level4 in current_settings.optimizerswitches) and
  8861. MatchOpType(taicpu(p),top_reg) and
  8862. (taicpu(p).oper[0]^.reg=NR_RAX) and
  8863. GetNextInstruction(p, hp1) and
  8864. { Take a copy of hp1 }
  8865. SetAndTest(hp1, hp4) and
  8866. { trick to skip label }
  8867. ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
  8868. SkipSimpleInstructions(hp1) and
  8869. MatchInstruction(hp1,A_CALL,[S_NO]) and
  8870. GetNextInstruction(hp1, hp2) and
  8871. MatchInstruction(hp2,A_POP,[taicpu(p).opsize]) and
  8872. MatchOpType(taicpu(hp2),top_reg) and
  8873. (taicpu(hp2).oper[0]^.reg=NR_RCX) and
  8874. GetNextInstruction(hp2, hp3) and
  8875. { trick to skip label }
  8876. ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
  8877. (MatchInstruction(hp3,A_RET,[S_NO]) or
  8878. (MatchInstruction(hp3,A_VZEROUPPER,[S_NO]) and
  8879. SetAndTest(hp3,hp5) and
  8880. GetNextInstruction(hp3,hp3) and
  8881. MatchInstruction(hp3,A_RET,[S_NO])
  8882. )
  8883. ) and
  8884. (taicpu(hp3).ops=0) then
  8885. begin
  8886. taicpu(hp1).opcode := A_JMP;
  8887. taicpu(hp1).is_jmp := true;
  8888. DebugMsg(SPeepholeOptimization + 'PushCallPushRet2Jmp done',p);
  8889. RemoveCurrentP(p, hp4);
  8890. RemoveInstruction(hp2);
  8891. RemoveInstruction(hp3);
  8892. if Assigned(hp5) then
  8893. begin
  8894. AsmL.Remove(hp5);
  8895. ASmL.InsertBefore(hp5,hp1)
  8896. end;
  8897. Result:=true;
  8898. end;
  8899. {$endif x86_64}
  8900. end;
  8901. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  8902. var
  8903. Value, RegName: string;
  8904. begin
  8905. Result:=false;
  8906. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  8907. begin
  8908. case taicpu(p).oper[0]^.val of
  8909. 0:
  8910. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  8911. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8912. begin
  8913. { change "mov $0,%reg" into "xor %reg,%reg" }
  8914. taicpu(p).opcode := A_XOR;
  8915. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  8916. Result := True;
  8917. end;
  8918. $1..$FFFFFFFF:
  8919. begin
  8920. { Code size reduction by J. Gareth "Kit" Moreton }
  8921. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  8922. case taicpu(p).opsize of
  8923. S_Q:
  8924. begin
  8925. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  8926. Value := debug_tostr(taicpu(p).oper[0]^.val);
  8927. { The actual optimization }
  8928. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  8929. taicpu(p).changeopsize(S_L);
  8930. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  8931. Result := True;
  8932. end;
  8933. else
  8934. { Do nothing };
  8935. end;
  8936. end;
  8937. -1:
  8938. { Don't make this optimisation if the CPU flags are required, since OR scrambles them }
  8939. if (cs_opt_size in current_settings.optimizerswitches) and
  8940. (taicpu(p).opsize <> S_B) and
  8941. not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  8942. begin
  8943. { change "mov $-1,%reg" into "or $-1,%reg" }
  8944. { NOTES:
  8945. - No size saving is made when changing a Word-sized assignment unless the register is AX (smaller encoding)
  8946. - This operation creates a false dependency on the register, so only do it when optimising for size
  8947. - It is possible to set memory operands using this method, but this creates an even greater false dependency, so don't do this at all
  8948. }
  8949. taicpu(p).opcode := A_OR;
  8950. Result := True;
  8951. end;
  8952. end;
  8953. end;
  8954. end;
  8955. function TX86AsmOptimizer.PostPeepholeOptAnd(var p : tai) : boolean;
  8956. var
  8957. hp1: tai;
  8958. begin
  8959. { Detect:
  8960. andw x, %ax (0 <= x < $8000)
  8961. ...
  8962. movzwl %ax,%eax
  8963. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  8964. }
  8965. Result := False; if MatchOpType(taicpu(p), top_const, top_reg) and
  8966. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  8967. ((taicpu(p).oper[0]^.val and $7FFF) = taicpu(p).oper[0]^.val) and
  8968. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  8969. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  8970. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  8971. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  8972. begin
  8973. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via AndMovz2AndCwtl)', hp1);
  8974. taicpu(hp1).opcode := A_CWDE;
  8975. taicpu(hp1).clearop(0);
  8976. taicpu(hp1).clearop(1);
  8977. taicpu(hp1).ops := 0;
  8978. { A change was made, but not with p, so move forward 1 }
  8979. p := tai(p.Next);
  8980. Result := True;
  8981. end;
  8982. end;
  8983. function TX86AsmOptimizer.PostPeepholeOptMOVSX(var p : tai) : boolean;
  8984. begin
  8985. Result := False;
  8986. if not MatchOpType(taicpu(p), top_reg, top_reg) then
  8987. Exit;
  8988. { Convert:
  8989. movswl %ax,%eax -> cwtl
  8990. movslq %eax,%rax -> cdqe
  8991. NOTE: Don't convert movswl %al,%ax to cbw, because cbw and cwde
  8992. refer to the same opcode and depends only on the assembler's
  8993. current operand-size attribute. [Kit]
  8994. }
  8995. with taicpu(p) do
  8996. case opsize of
  8997. S_WL:
  8998. if (oper[0]^.reg = NR_AX) and (oper[1]^.reg = NR_EAX) then
  8999. begin
  9000. DebugMsg(SPeepholeOptimization + 'Converted movswl %ax,%eax to cwtl', p);
  9001. opcode := A_CWDE;
  9002. clearop(0);
  9003. clearop(1);
  9004. ops := 0;
  9005. Result := True;
  9006. end;
  9007. {$ifdef x86_64}
  9008. S_LQ:
  9009. if (oper[0]^.reg = NR_EAX) and (oper[1]^.reg = NR_RAX) then
  9010. begin
  9011. DebugMsg(SPeepholeOptimization + 'Converted movslq %eax,%rax to cltq', p);
  9012. opcode := A_CDQE;
  9013. clearop(0);
  9014. clearop(1);
  9015. ops := 0;
  9016. Result := True;
  9017. end;
  9018. {$endif x86_64}
  9019. else
  9020. ;
  9021. end;
  9022. end;
  9023. function TX86AsmOptimizer.PostPeepholeOptShr(var p : tai) : boolean;
  9024. var
  9025. hp1: tai;
  9026. begin
  9027. { Detect:
  9028. shr x, %ax (x > 0)
  9029. ...
  9030. movzwl %ax,%eax
  9031. Change movzwl %ax,%eax to cwtl (shorter encoding for movswl %ax,%eax)
  9032. }
  9033. Result := False;
  9034. if MatchOpType(taicpu(p), top_const, top_reg) and
  9035. (taicpu(p).oper[1]^.reg = NR_AX) and { This is also enough to determine that opsize = S_W }
  9036. (taicpu(p).oper[0]^.val > 0) and
  9037. GetNextInstructionUsingReg(p, hp1, NR_EAX) and
  9038. MatchInstruction(hp1, A_MOVZX, [S_WL]) and
  9039. MatchOperand(taicpu(hp1).oper[0]^, NR_AX) and
  9040. MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) then
  9041. begin
  9042. DebugMsg(SPeepholeOptimization + 'Converted movzwl %ax,%eax to cwtl (via ShrMovz2ShrCwtl)', hp1);
  9043. taicpu(hp1).opcode := A_CWDE;
  9044. taicpu(hp1).clearop(0);
  9045. taicpu(hp1).clearop(1);
  9046. taicpu(hp1).ops := 0;
  9047. { A change was made, but not with p, so move forward 1 }
  9048. p := tai(p.Next);
  9049. Result := True;
  9050. end;
  9051. end;
  9052. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  9053. begin
  9054. Result:=false;
  9055. { change "cmp $0, %reg" to "test %reg, %reg" }
  9056. if MatchOpType(taicpu(p),top_const,top_reg) and
  9057. (taicpu(p).oper[0]^.val = 0) then
  9058. begin
  9059. taicpu(p).opcode := A_TEST;
  9060. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  9061. Result:=true;
  9062. end;
  9063. end;
  9064. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  9065. var
  9066. IsTestConstX : Boolean;
  9067. hp1,hp2 : tai;
  9068. begin
  9069. Result:=false;
  9070. { removes the line marked with (x) from the sequence
  9071. and/or/xor/add/sub/... $x, %y
  9072. test/or %y, %y | test $-1, %y (x)
  9073. j(n)z _Label
  9074. as the first instruction already adjusts the ZF
  9075. %y operand may also be a reference }
  9076. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  9077. MatchOperand(taicpu(p).oper[0]^,-1);
  9078. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  9079. GetLastInstruction(p, hp1) and
  9080. (tai(hp1).typ = ait_instruction) and
  9081. GetNextInstruction(p,hp2) and
  9082. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  9083. case taicpu(hp1).opcode Of
  9084. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  9085. begin
  9086. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9087. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9088. { and in case of carry for A(E)/B(E)/C/NC }
  9089. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  9090. ((taicpu(hp1).opcode <> A_ADD) and
  9091. (taicpu(hp1).opcode <> A_SUB))) then
  9092. begin
  9093. RemoveCurrentP(p, hp2);
  9094. Result:=true;
  9095. Exit;
  9096. end;
  9097. end;
  9098. A_SHL, A_SAL, A_SHR, A_SAR:
  9099. begin
  9100. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  9101. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  9102. { therefore, it's only safe to do this optimization for }
  9103. { shifts by a (nonzero) constant }
  9104. (taicpu(hp1).oper[0]^.typ = top_const) and
  9105. (taicpu(hp1).oper[0]^.val <> 0) and
  9106. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9107. { and in case of carry for A(E)/B(E)/C/NC }
  9108. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9109. begin
  9110. RemoveCurrentP(p, hp2);
  9111. Result:=true;
  9112. Exit;
  9113. end;
  9114. end;
  9115. A_DEC, A_INC, A_NEG:
  9116. begin
  9117. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  9118. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  9119. { and in case of carry for A(E)/B(E)/C/NC }
  9120. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  9121. begin
  9122. RemoveCurrentP(p, hp2);
  9123. Result:=true;
  9124. Exit;
  9125. end;
  9126. end
  9127. else
  9128. ;
  9129. end; { case }
  9130. { change "test $-1,%reg" into "test %reg,%reg" }
  9131. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  9132. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  9133. { Change "or %reg,%reg" to "test %reg,%reg" as OR generates a false dependency }
  9134. if MatchInstruction(p, A_OR, []) and
  9135. { Can only match if they're both registers }
  9136. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) then
  9137. begin
  9138. DebugMsg(SPeepholeOptimization + 'or %reg,%reg -> test %reg,%reg to remove false dependency (Or2Test)', p);
  9139. taicpu(p).opcode := A_TEST;
  9140. { No need to set Result to True, as we've done all the optimisations we can }
  9141. end;
  9142. end;
  9143. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  9144. var
  9145. hp1,hp3 : tai;
  9146. {$ifndef x86_64}
  9147. hp2 : taicpu;
  9148. {$endif x86_64}
  9149. begin
  9150. Result:=false;
  9151. hp3:=nil;
  9152. {$ifndef x86_64}
  9153. { don't do this on modern CPUs, this really hurts them due to
  9154. broken call/ret pairing }
  9155. if (current_settings.optimizecputype < cpu_Pentium2) and
  9156. not(cs_create_pic in current_settings.moduleswitches) and
  9157. GetNextInstruction(p, hp1) and
  9158. MatchInstruction(hp1,A_JMP,[S_NO]) and
  9159. MatchOpType(taicpu(hp1),top_ref) and
  9160. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  9161. begin
  9162. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  9163. InsertLLItem(p.previous, p, hp2);
  9164. taicpu(p).opcode := A_JMP;
  9165. taicpu(p).is_jmp := true;
  9166. RemoveInstruction(hp1);
  9167. Result:=true;
  9168. end
  9169. else
  9170. {$endif x86_64}
  9171. { replace
  9172. call procname
  9173. ret
  9174. by
  9175. jmp procname
  9176. but do it only on level 4 because it destroys stack back traces
  9177. else if the subroutine is marked as no return, remove the ret
  9178. }
  9179. if ((cs_opt_level4 in current_settings.optimizerswitches) or
  9180. (po_noreturn in current_procinfo.procdef.procoptions)) and
  9181. GetNextInstruction(p, hp1) and
  9182. (MatchInstruction(hp1,A_RET,[S_NO]) or
  9183. (MatchInstruction(hp1,A_VZEROUPPER,[S_NO]) and
  9184. SetAndTest(hp1,hp3) and
  9185. GetNextInstruction(hp1,hp1) and
  9186. MatchInstruction(hp1,A_RET,[S_NO])
  9187. )
  9188. ) and
  9189. (taicpu(hp1).ops=0) then
  9190. begin
  9191. if (cs_opt_level4 in current_settings.optimizerswitches) and
  9192. { we might destroy stack alignment here if we do not do a call }
  9193. (target_info.stackalign<=sizeof(SizeUInt)) then
  9194. begin
  9195. taicpu(p).opcode := A_JMP;
  9196. taicpu(p).is_jmp := true;
  9197. DebugMsg(SPeepholeOptimization + 'CallRet2Jmp done',p);
  9198. end
  9199. else
  9200. DebugMsg(SPeepholeOptimization + 'CallRet2Call done',p);
  9201. RemoveInstruction(hp1);
  9202. if Assigned(hp3) then
  9203. begin
  9204. AsmL.Remove(hp3);
  9205. AsmL.InsertBefore(hp3,p)
  9206. end;
  9207. Result:=true;
  9208. end;
  9209. end;
  9210. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  9211. function ConstInRange(const Val: TCGInt; const OpSize: TOpSize): Boolean;
  9212. begin
  9213. case OpSize of
  9214. S_B, S_BW, S_BL{$ifdef x86_64}, S_BQ{$endif x86_64}:
  9215. Result := (Val <= $FF) and (Val >= -128);
  9216. S_W, S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  9217. Result := (Val <= $FFFF) and (Val >= -32768);
  9218. S_L{$ifdef x86_64}, S_LQ{$endif x86_64}:
  9219. Result := (Val <= $FFFFFFFF) and (Val >= -2147483648);
  9220. else
  9221. Result := True;
  9222. end;
  9223. end;
  9224. var
  9225. hp1, hp2 : tai;
  9226. SizeChange: Boolean;
  9227. PreMessage: string;
  9228. begin
  9229. Result := False;
  9230. if (taicpu(p).oper[0]^.typ = top_reg) and
  9231. SuperRegistersEqual(taicpu(p).oper[0]^.reg, taicpu(p).oper[1]^.reg) and
  9232. GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) then
  9233. begin
  9234. { Change (using movzbl %al,%eax as an example):
  9235. movzbl %al, %eax movzbl %al, %eax
  9236. cmpl x, %eax testl %eax,%eax
  9237. To:
  9238. cmpb x, %al testb %al, %al (Move one back to avoid a false dependency)
  9239. movzbl %al, %eax movzbl %al, %eax
  9240. Smaller instruction and minimises pipeline stall as the CPU
  9241. doesn't have to wait for the register to get zero-extended. [Kit]
  9242. Also allow if the smaller of the two registers is being checked,
  9243. as this still removes the false dependency.
  9244. }
  9245. if
  9246. (
  9247. (
  9248. (taicpu(hp1).opcode = A_CMP) and MatchOpType(taicpu(hp1), top_const, top_reg) and
  9249. ConstInRange(taicpu(hp1).oper[0]^.val, taicpu(p).opsize)
  9250. ) or (
  9251. { If MatchOperand returns True, they must both be registers }
  9252. (taicpu(hp1).opcode = A_TEST) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)
  9253. )
  9254. ) and
  9255. (reg2opsize(taicpu(hp1).oper[1]^.reg) <= reg2opsize(taicpu(p).oper[1]^.reg)) and
  9256. SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[1]^.reg) then
  9257. begin
  9258. PreMessage := debug_op2str(taicpu(hp1).opcode) + debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' -> ' + debug_op2str(taicpu(hp1).opcode);
  9259. asml.Remove(hp1);
  9260. asml.InsertBefore(hp1, p);
  9261. { Swap instructions in the case of cmp 0,%reg or test %reg,%reg }
  9262. if (taicpu(hp1).opcode = A_TEST) or (taicpu(hp1).oper[0]^.val = 0) then
  9263. begin
  9264. taicpu(hp1).opcode := A_TEST;
  9265. taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
  9266. end;
  9267. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[0]^.reg;
  9268. case taicpu(p).opsize of
  9269. S_BW, S_BL:
  9270. begin
  9271. SizeChange := taicpu(hp1).opsize <> S_B;
  9272. taicpu(hp1).changeopsize(S_B);
  9273. end;
  9274. S_WL:
  9275. begin
  9276. SizeChange := taicpu(hp1).opsize <> S_W;
  9277. taicpu(hp1).changeopsize(S_W);
  9278. end
  9279. else
  9280. InternalError(2020112701);
  9281. end;
  9282. UpdateUsedRegs(tai(p.Next));
  9283. { Check if the register is used aferwards - if not, we can
  9284. remove the movzx instruction completely }
  9285. if not RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, p, UsedRegs) then
  9286. begin
  9287. { Hp1 is a better position than p for debugging purposes }
  9288. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4a', hp1);
  9289. RemoveCurrentp(p, hp1);
  9290. Result := True;
  9291. end;
  9292. if SizeChange then
  9293. DebugMsg(SPeepholeOptimization + PreMessage +
  9294. debug_opsize2str(taicpu(hp1).opsize) + ' ' + debug_operstr(taicpu(hp1).oper[0]^) + ',' + debug_regname(taicpu(hp1).oper[1]^.reg) + ' (smaller and minimises pipeline stall - MovzxCmp2CmpMovzx)', hp1)
  9295. else
  9296. DebugMsg(SPeepholeOptimization + 'MovzxCmp2CmpMovzx', hp1);
  9297. Exit;
  9298. end;
  9299. { Change (using movzwl %ax,%eax as an example):
  9300. movzwl %ax, %eax
  9301. movb %al, (dest) (Register is smaller than read register in movz)
  9302. To:
  9303. movb %al, (dest) (Move one back to avoid a false dependency)
  9304. movzwl %ax, %eax
  9305. }
  9306. if (taicpu(hp1).opcode = A_MOV) and
  9307. (taicpu(hp1).oper[0]^.typ = top_reg) and
  9308. not RegInOp(taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^) and
  9309. SuperRegistersEqual(taicpu(hp1).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
  9310. (reg2opsize(taicpu(hp1).oper[0]^.reg) <= reg2opsize(taicpu(p).oper[0]^.reg)) then
  9311. begin
  9312. DebugMsg(SPeepholeOptimization + 'MovzxMov2MovMovzx', hp1);
  9313. hp2 := tai(hp1.Previous); { Effectively the old position of hp1 }
  9314. asml.Remove(hp1);
  9315. asml.InsertBefore(hp1, p);
  9316. if taicpu(hp1).oper[1]^.typ = top_reg then
  9317. AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, hp2, UsedRegs);
  9318. { Check if the register is used aferwards - if not, we can
  9319. remove the movzx instruction completely }
  9320. if not RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, p, UsedRegs) then
  9321. begin
  9322. { Hp1 is a better position than p for debugging purposes }
  9323. DebugMsg(SPeepholeOptimization + 'Movzx2Nop 4b', hp1);
  9324. RemoveCurrentp(p, hp1);
  9325. Result := True;
  9326. end;
  9327. Exit;
  9328. end;
  9329. end;
  9330. {$ifdef x86_64}
  9331. { Code size reduction by J. Gareth "Kit" Moreton }
  9332. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  9333. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  9334. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  9335. then
  9336. begin
  9337. { Has 64-bit register name and opcode suffix }
  9338. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  9339. { The actual optimization }
  9340. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9341. if taicpu(p).opsize = S_BQ then
  9342. taicpu(p).changeopsize(S_BL)
  9343. else
  9344. taicpu(p).changeopsize(S_WL);
  9345. DebugMsg(SPeepholeOptimization + PreMessage +
  9346. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  9347. end;
  9348. {$endif}
  9349. end;
  9350. {$ifdef x86_64}
  9351. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  9352. var
  9353. PreMessage, RegName: string;
  9354. begin
  9355. { Code size reduction by J. Gareth "Kit" Moreton }
  9356. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  9357. as this removes the REX prefix }
  9358. Result := False;
  9359. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  9360. Exit;
  9361. if taicpu(p).oper[0]^.typ <> top_reg then
  9362. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  9363. InternalError(2018011500);
  9364. case taicpu(p).opsize of
  9365. S_Q:
  9366. begin
  9367. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  9368. begin
  9369. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  9370. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  9371. { The actual optimization }
  9372. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  9373. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  9374. taicpu(p).changeopsize(S_L);
  9375. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  9376. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  9377. end;
  9378. end;
  9379. else
  9380. ;
  9381. end;
  9382. end;
  9383. {$endif}
  9384. class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
  9385. var
  9386. OperIdx: Integer;
  9387. begin
  9388. for OperIdx := 0 to p.ops - 1 do
  9389. if p.oper[OperIdx]^.typ = top_ref then
  9390. optimize_ref(p.oper[OperIdx]^.ref^, False);
  9391. end;
  9392. end.