DAGCombiner.cpp 536 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102
  1. //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
  11. // both before and after the DAG is legalized.
  12. //
  13. // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14. // primarily intended to handle simplification opportunities that are implicit
  15. // in the LLVM IR and exposed by the various codegen lowering phases.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "llvm/CodeGen/SelectionDAG.h"
  19. #include "llvm/ADT/SetVector.h"
  20. #include "llvm/ADT/SmallBitVector.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/Analysis/AliasAnalysis.h"
  24. #include "llvm/CodeGen/MachineFrameInfo.h"
  25. #include "llvm/CodeGen/MachineFunction.h"
  26. #include "llvm/IR/DataLayout.h"
  27. #include "llvm/IR/DerivedTypes.h"
  28. #include "llvm/IR/Function.h"
  29. #include "llvm/IR/LLVMContext.h"
  30. #include "llvm/Support/CommandLine.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Support/ErrorHandling.h"
  33. #include "llvm/Support/MathExtras.h"
  34. #include "llvm/Support/raw_ostream.h"
  35. #include "llvm/Target/TargetLowering.h"
  36. #include "llvm/Target/TargetOptions.h"
  37. #include "llvm/Target/TargetRegisterInfo.h"
  38. #include "llvm/Target/TargetSubtargetInfo.h"
  39. #include <algorithm>
  40. using namespace llvm;
  41. #define DEBUG_TYPE "dagcombine"
  42. STATISTIC(NodesCombined , "Number of dag nodes combined");
  43. STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  44. STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  45. STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
  46. STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
  47. STATISTIC(SlicedLoads, "Number of load sliced");
  48. namespace {
  49. static cl::opt<bool>
  50. CombinerAA("combiner-alias-analysis", cl::Hidden,
  51. cl::desc("Enable DAG combiner alias-analysis heuristics"));
  52. static cl::opt<bool>
  53. CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  54. cl::desc("Enable DAG combiner's use of IR alias analysis"));
  55. static cl::opt<bool>
  56. UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  57. cl::desc("Enable DAG combiner's use of TBAA"));
  58. #ifndef NDEBUG
  59. static cl::opt<std::string>
  60. CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  61. cl::desc("Only use DAG-combiner alias analysis in this"
  62. " function"));
  63. #endif
  64. /// Hidden option to stress test load slicing, i.e., when this option
  65. /// is enabled, load slicing bypasses most of its profitability guards.
  66. static cl::opt<bool>
  67. StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
  68. cl::desc("Bypass the profitability model of load "
  69. "slicing"),
  70. cl::init(false));
  71. static cl::opt<bool>
  72. MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
  73. cl::desc("DAG combiner may split indexing from loads"));
  74. //------------------------------ DAGCombiner ---------------------------------//
  75. class DAGCombiner {
  76. SelectionDAG &DAG;
  77. const TargetLowering &TLI;
  78. CombineLevel Level;
  79. CodeGenOpt::Level OptLevel;
  80. bool LegalOperations;
  81. bool LegalTypes;
  82. bool ForCodeSize;
  83. /// \brief Worklist of all of the nodes that need to be simplified.
  84. ///
  85. /// This must behave as a stack -- new nodes to process are pushed onto the
  86. /// back and when processing we pop off of the back.
  87. ///
  88. /// The worklist will not contain duplicates but may contain null entries
  89. /// due to nodes being deleted from the underlying DAG.
  90. SmallVector<SDNode *, 64> Worklist;
  91. /// \brief Mapping from an SDNode to its position on the worklist.
  92. ///
  93. /// This is used to find and remove nodes from the worklist (by nulling
  94. /// them) when they are deleted from the underlying DAG. It relies on
  95. /// stable indices of nodes within the worklist.
  96. DenseMap<SDNode *, unsigned> WorklistMap;
  97. /// \brief Set of nodes which have been combined (at least once).
  98. ///
  99. /// This is used to allow us to reliably add any operands of a DAG node
  100. /// which have not yet been combined to the worklist.
  101. SmallPtrSet<SDNode *, 64> CombinedNodes;
  102. // AA - Used for DAG load/store alias analysis.
  103. AliasAnalysis &AA;
  104. /// When an instruction is simplified, add all users of the instruction to
  105. /// the work lists because they might get more simplified now.
  106. void AddUsersToWorklist(SDNode *N) {
  107. for (SDNode *Node : N->uses())
  108. AddToWorklist(Node);
  109. }
  110. /// Call the node-specific routine that folds each particular type of node.
  111. SDValue visit(SDNode *N);
  112. public:
  113. /// Add to the worklist making sure its instance is at the back (next to be
  114. /// processed.)
  115. void AddToWorklist(SDNode *N) {
  116. // Skip handle nodes as they can't usefully be combined and confuse the
  117. // zero-use deletion strategy.
  118. if (N->getOpcode() == ISD::HANDLENODE)
  119. return;
  120. if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
  121. Worklist.push_back(N);
  122. }
  123. /// Remove all instances of N from the worklist.
  124. void removeFromWorklist(SDNode *N) {
  125. CombinedNodes.erase(N);
  126. auto It = WorklistMap.find(N);
  127. if (It == WorklistMap.end())
  128. return; // Not in the worklist.
  129. // Null out the entry rather than erasing it to avoid a linear operation.
  130. Worklist[It->second] = nullptr;
  131. WorklistMap.erase(It);
  132. }
  133. void deleteAndRecombine(SDNode *N);
  134. bool recursivelyDeleteUnusedNodes(SDNode *N);
  135. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  136. bool AddTo = true);
  137. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
  138. return CombineTo(N, &Res, 1, AddTo);
  139. }
  140. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
  141. bool AddTo = true) {
  142. SDValue To[] = { Res0, Res1 };
  143. return CombineTo(N, To, 2, AddTo);
  144. }
  145. void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
  146. private:
  147. /// Check the specified integer node value to see if it can be simplified or
  148. /// if things it uses can be simplified by bit propagation.
  149. /// If so, return true.
  150. bool SimplifyDemandedBits(SDValue Op) {
  151. unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
  152. APInt Demanded = APInt::getAllOnesValue(BitWidth);
  153. return SimplifyDemandedBits(Op, Demanded);
  154. }
  155. bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
  156. bool CombineToPreIndexedLoadStore(SDNode *N);
  157. bool CombineToPostIndexedLoadStore(SDNode *N);
  158. SDValue SplitIndexingFromLoad(LoadSDNode *LD);
  159. bool SliceUpLoad(SDNode *N);
  160. /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
  161. /// load.
  162. ///
  163. /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
  164. /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
  165. /// \param EltNo index of the vector element to load.
  166. /// \param OriginalLoad load that EVE came from to be replaced.
  167. /// \returns EVE on success SDValue() on failure.
  168. SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
  169. SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
  170. void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
  171. SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
  172. SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
  173. SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
  174. SDValue PromoteIntBinOp(SDValue Op);
  175. SDValue PromoteIntShiftOp(SDValue Op);
  176. SDValue PromoteExtend(SDValue Op);
  177. bool PromoteLoad(SDValue Op);
  178. void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  179. SDValue Trunc, SDValue ExtLoad, SDLoc DL,
  180. ISD::NodeType ExtType);
  181. /// Call the node-specific routine that knows how to fold each
  182. /// particular type of node. If that doesn't do anything, try the
  183. /// target-specific DAG combines.
  184. SDValue combine(SDNode *N);
  185. // Visitation implementation - Implement dag node combining for different
  186. // node types. The semantics are as follows:
  187. // Return Value:
  188. // SDValue.getNode() == 0 - No change was made
  189. // SDValue.getNode() == N - N was replaced, is dead and has been handled.
  190. // otherwise - N should be replaced by the returned Operand.
  191. //
  192. SDValue visitTokenFactor(SDNode *N);
  193. SDValue visitMERGE_VALUES(SDNode *N);
  194. SDValue visitADD(SDNode *N);
  195. SDValue visitSUB(SDNode *N);
  196. SDValue visitADDC(SDNode *N);
  197. SDValue visitSUBC(SDNode *N);
  198. SDValue visitADDE(SDNode *N);
  199. SDValue visitSUBE(SDNode *N);
  200. SDValue visitMUL(SDNode *N);
  201. SDValue visitSDIV(SDNode *N);
  202. SDValue visitUDIV(SDNode *N);
  203. SDValue visitSREM(SDNode *N);
  204. SDValue visitUREM(SDNode *N);
  205. SDValue visitMULHU(SDNode *N);
  206. SDValue visitMULHS(SDNode *N);
  207. SDValue visitSMUL_LOHI(SDNode *N);
  208. SDValue visitUMUL_LOHI(SDNode *N);
  209. SDValue visitSMULO(SDNode *N);
  210. SDValue visitUMULO(SDNode *N);
  211. SDValue visitSDIVREM(SDNode *N);
  212. SDValue visitUDIVREM(SDNode *N);
  213. SDValue visitAND(SDNode *N);
  214. SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
  215. SDValue visitOR(SDNode *N);
  216. SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
  217. SDValue visitXOR(SDNode *N);
  218. SDValue SimplifyVBinOp(SDNode *N);
  219. SDValue visitSHL(SDNode *N);
  220. SDValue visitSRA(SDNode *N);
  221. SDValue visitSRL(SDNode *N);
  222. SDValue visitRotate(SDNode *N);
  223. SDValue visitBSWAP(SDNode *N);
  224. SDValue visitCTLZ(SDNode *N);
  225. SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
  226. SDValue visitCTTZ(SDNode *N);
  227. SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
  228. SDValue visitCTPOP(SDNode *N);
  229. SDValue visitSELECT(SDNode *N);
  230. SDValue visitVSELECT(SDNode *N);
  231. SDValue visitSELECT_CC(SDNode *N);
  232. SDValue visitSETCC(SDNode *N);
  233. SDValue visitSIGN_EXTEND(SDNode *N);
  234. SDValue visitZERO_EXTEND(SDNode *N);
  235. SDValue visitANY_EXTEND(SDNode *N);
  236. SDValue visitSIGN_EXTEND_INREG(SDNode *N);
  237. SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
  238. SDValue visitTRUNCATE(SDNode *N);
  239. SDValue visitBITCAST(SDNode *N);
  240. SDValue visitBUILD_PAIR(SDNode *N);
  241. SDValue visitFADD(SDNode *N);
  242. SDValue visitFSUB(SDNode *N);
  243. SDValue visitFMUL(SDNode *N);
  244. SDValue visitFMA(SDNode *N);
  245. SDValue visitFDIV(SDNode *N);
  246. SDValue visitFREM(SDNode *N);
  247. SDValue visitFSQRT(SDNode *N);
  248. SDValue visitFCOPYSIGN(SDNode *N);
  249. SDValue visitSINT_TO_FP(SDNode *N);
  250. SDValue visitUINT_TO_FP(SDNode *N);
  251. SDValue visitFP_TO_SINT(SDNode *N);
  252. SDValue visitFP_TO_UINT(SDNode *N);
  253. SDValue visitFP_ROUND(SDNode *N);
  254. SDValue visitFP_ROUND_INREG(SDNode *N);
  255. SDValue visitFP_EXTEND(SDNode *N);
  256. SDValue visitFNEG(SDNode *N);
  257. SDValue visitFABS(SDNode *N);
  258. SDValue visitFCEIL(SDNode *N);
  259. SDValue visitFTRUNC(SDNode *N);
  260. SDValue visitFFLOOR(SDNode *N);
  261. SDValue visitFMINNUM(SDNode *N);
  262. SDValue visitFMAXNUM(SDNode *N);
  263. SDValue visitBRCOND(SDNode *N);
  264. SDValue visitBR_CC(SDNode *N);
  265. SDValue visitLOAD(SDNode *N);
  266. SDValue visitSTORE(SDNode *N);
  267. SDValue visitINSERT_VECTOR_ELT(SDNode *N);
  268. SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
  269. SDValue visitBUILD_VECTOR(SDNode *N);
  270. SDValue visitCONCAT_VECTORS(SDNode *N);
  271. SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
  272. SDValue visitVECTOR_SHUFFLE(SDNode *N);
  273. SDValue visitSCALAR_TO_VECTOR(SDNode *N);
  274. SDValue visitINSERT_SUBVECTOR(SDNode *N);
  275. SDValue visitMLOAD(SDNode *N);
  276. SDValue visitMSTORE(SDNode *N);
  277. SDValue visitMGATHER(SDNode *N);
  278. SDValue visitMSCATTER(SDNode *N);
  279. SDValue visitFP_TO_FP16(SDNode *N);
  280. SDValue visitFADDForFMACombine(SDNode *N);
  281. SDValue visitFSUBForFMACombine(SDNode *N);
  282. SDValue XformToShuffleWithZero(SDNode *N);
  283. SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
  284. SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
  285. bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
  286. SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
  287. SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
  288. SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
  289. SDValue N3, ISD::CondCode CC,
  290. bool NotExtCompare = false);
  291. SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
  292. SDLoc DL, bool foldBooleans = true);
  293. bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
  294. SDValue &CC) const;
  295. bool isOneUseSetCC(SDValue N) const;
  296. SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  297. unsigned HiOp);
  298. SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
  299. SDValue CombineExtLoad(SDNode *N);
  300. SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
  301. SDValue BuildSDIV(SDNode *N);
  302. SDValue BuildSDIVPow2(SDNode *N);
  303. SDValue BuildUDIV(SDNode *N);
  304. SDValue BuildReciprocalEstimate(SDValue Op);
  305. SDValue BuildRsqrtEstimate(SDValue Op);
  306. SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
  307. SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
  308. SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  309. bool DemandHighBits = true);
  310. SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
  311. SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
  312. SDValue InnerPos, SDValue InnerNeg,
  313. unsigned PosOpcode, unsigned NegOpcode,
  314. SDLoc DL);
  315. SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
  316. SDValue ReduceLoadWidth(SDNode *N);
  317. SDValue ReduceLoadOpStoreWidth(SDNode *N);
  318. SDValue TransformFPLoadStorePair(SDNode *N);
  319. SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
  320. SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
  321. SDValue GetDemandedBits(SDValue V, const APInt &Mask);
  322. /// Walk up chain skipping non-aliasing memory nodes,
  323. /// looking for aliasing nodes and adding them to the Aliases vector.
  324. void GatherAllAliases(SDNode *N, SDValue OriginalChain,
  325. SmallVectorImpl<SDValue> &Aliases);
  326. /// Return true if there is any possibility that the two addresses overlap.
  327. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
  328. /// Walk up chain skipping non-aliasing memory nodes, looking for a better
  329. /// chain (aliasing node.)
  330. SDValue FindBetterChain(SDNode *N, SDValue Chain);
  331. /// Holds a pointer to an LSBaseSDNode as well as information on where it
  332. /// is located in a sequence of memory operations connected by a chain.
  333. struct MemOpLink {
  334. MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
  335. MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
  336. // Ptr to the mem node.
  337. LSBaseSDNode *MemNode;
  338. // Offset from the base ptr.
  339. int64_t OffsetFromBase;
  340. // What is the sequence number of this mem node.
  341. // Lowest mem operand in the DAG starts at zero.
  342. unsigned SequenceNum;
  343. };
  344. /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
  345. /// constant build_vector of the stored constant values in Stores.
  346. SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
  347. SDLoc SL,
  348. ArrayRef<MemOpLink> Stores,
  349. EVT Ty) const;
  350. /// This is a helper function for MergeConsecutiveStores. When the source
  351. /// elements of the consecutive stores are all constants or all extracted
  352. /// vector elements, try to merge them into one larger store.
  353. /// \return True if a merged store was created.
  354. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
  355. EVT MemVT, unsigned NumElem,
  356. bool IsConstantSrc, bool UseVector);
  357. /// This is a helper function for MergeConsecutiveStores.
  358. /// Stores that may be merged are placed in StoreNodes.
  359. /// Loads that may alias with those stores are placed in AliasLoadNodes.
  360. void getStoreMergeAndAliasCandidates(
  361. StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
  362. SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
  363. /// Merge consecutive store operations into a wide store.
  364. /// This optimization uses wide integers or vectors when possible.
  365. /// \return True if some memory operations were changed.
  366. bool MergeConsecutiveStores(StoreSDNode *N);
  367. /// \brief Try to transform a truncation where C is a constant:
  368. /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
  369. ///
  370. /// \p N needs to be a truncation and its first operand an AND. Other
  371. /// requirements are checked by the function (e.g. that trunc is
  372. /// single-use) and if missed an empty SDValue is returned.
  373. SDValue distributeTruncateThroughAnd(SDNode *N);
  374. public:
  375. DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
  376. : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
  377. OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
  378. auto *F = DAG.getMachineFunction().getFunction();
  379. ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
  380. F->hasFnAttribute(Attribute::MinSize);
  381. }
  382. /// Runs the dag combiner on all nodes in the work list
  383. void Run(CombineLevel AtLevel);
  384. SelectionDAG &getDAG() const { return DAG; }
  385. /// Returns a type large enough to hold any valid shift amount - before type
  386. /// legalization these can be huge.
  387. EVT getShiftAmountTy(EVT LHSTy) {
  388. assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
  389. if (LHSTy.isVector())
  390. return LHSTy;
  391. auto &DL = DAG.getDataLayout();
  392. return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
  393. : TLI.getPointerTy(DL);
  394. }
  395. /// This method returns true if we are running before type legalization or
  396. /// if the specified VT is legal.
  397. bool isTypeLegal(const EVT &VT) {
  398. if (!LegalTypes) return true;
  399. return TLI.isTypeLegal(VT);
  400. }
  401. /// Convenience wrapper around TargetLowering::getSetCCResultType
  402. EVT getSetCCResultType(EVT VT) const {
  403. return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  404. }
  405. };
  406. }
  407. namespace {
  408. /// This class is a DAGUpdateListener that removes any deleted
  409. /// nodes from the worklist.
  410. class WorklistRemover : public SelectionDAG::DAGUpdateListener {
  411. DAGCombiner &DC;
  412. public:
  413. explicit WorklistRemover(DAGCombiner &dc)
  414. : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
  415. void NodeDeleted(SDNode *N, SDNode *E) override {
  416. DC.removeFromWorklist(N);
  417. }
  418. };
  419. }
  420. //===----------------------------------------------------------------------===//
  421. // TargetLowering::DAGCombinerInfo implementation
  422. //===----------------------------------------------------------------------===//
  423. void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
  424. ((DAGCombiner*)DC)->AddToWorklist(N);
  425. }
  426. void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
  427. ((DAGCombiner*)DC)->removeFromWorklist(N);
  428. }
  429. SDValue TargetLowering::DAGCombinerInfo::
  430. CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
  431. return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
  432. }
  433. SDValue TargetLowering::DAGCombinerInfo::
  434. CombineTo(SDNode *N, SDValue Res, bool AddTo) {
  435. return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
  436. }
  437. SDValue TargetLowering::DAGCombinerInfo::
  438. CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
  439. return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
  440. }
  441. void TargetLowering::DAGCombinerInfo::
  442. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  443. return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
  444. }
  445. //===----------------------------------------------------------------------===//
  446. // Helper Functions
  447. //===----------------------------------------------------------------------===//
  448. void DAGCombiner::deleteAndRecombine(SDNode *N) {
  449. removeFromWorklist(N);
  450. // If the operands of this node are only used by the node, they will now be
  451. // dead. Make sure to re-visit them and recursively delete dead nodes.
  452. for (const SDValue &Op : N->ops())
  453. // For an operand generating multiple values, one of the values may
  454. // become dead allowing further simplification (e.g. split index
  455. // arithmetic from an indexed load).
  456. if (Op->hasOneUse() || Op->getNumValues() > 1)
  457. AddToWorklist(Op.getNode());
  458. DAG.DeleteNode(N);
  459. }
  460. /// Return 1 if we can compute the negated form of the specified expression for
  461. /// the same cost as the expression itself, or 2 if we can compute the negated
  462. /// form more cheaply than the expression itself.
  463. static char isNegatibleForFree(SDValue Op, bool LegalOperations,
  464. const TargetLowering &TLI,
  465. const TargetOptions *Options,
  466. unsigned Depth = 0) {
  467. // fneg is removable even if it has multiple uses.
  468. if (Op.getOpcode() == ISD::FNEG) return 2;
  469. // Don't allow anything with multiple uses.
  470. if (!Op.hasOneUse()) return 0;
  471. // Don't recurse exponentially.
  472. if (Depth > 6) return 0;
  473. switch (Op.getOpcode()) {
  474. default: return false;
  475. case ISD::ConstantFP:
  476. // Don't invert constant FP values after legalize. The negated constant
  477. // isn't necessarily legal.
  478. return LegalOperations ? 0 : 1;
  479. case ISD::FADD:
  480. // FIXME: determine better conditions for this xform.
  481. if (!Options->UnsafeFPMath) return 0;
  482. // After operation legalization, it might not be legal to create new FSUBs.
  483. if (LegalOperations &&
  484. !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
  485. return 0;
  486. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  487. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  488. Options, Depth + 1))
  489. return V;
  490. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  491. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  492. Depth + 1);
  493. case ISD::FSUB:
  494. // We can't turn -(A-B) into B-A when we honor signed zeros.
  495. if (!Options->UnsafeFPMath) return 0;
  496. // fold (fneg (fsub A, B)) -> (fsub B, A)
  497. return 1;
  498. case ISD::FMUL:
  499. case ISD::FDIV:
  500. if (Options->HonorSignDependentRoundingFPMath()) return 0;
  501. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
  502. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  503. Options, Depth + 1))
  504. return V;
  505. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  506. Depth + 1);
  507. case ISD::FP_EXTEND:
  508. case ISD::FP_ROUND:
  509. case ISD::FSIN:
  510. return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
  511. Depth + 1);
  512. }
  513. }
  514. /// If isNegatibleForFree returns true, return the newly negated expression.
  515. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  516. bool LegalOperations, unsigned Depth = 0) {
  517. const TargetOptions &Options = DAG.getTarget().Options;
  518. // fneg is removable even if it has multiple uses.
  519. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
  520. // Don't allow anything with multiple uses.
  521. assert(Op.hasOneUse() && "Unknown reuse!");
  522. assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
  523. switch (Op.getOpcode()) {
  524. default: llvm_unreachable("Unknown code");
  525. case ISD::ConstantFP: {
  526. APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
  527. V.changeSign();
  528. return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
  529. }
  530. case ISD::FADD:
  531. // FIXME: determine better conditions for this xform.
  532. assert(Options.UnsafeFPMath);
  533. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  534. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  535. DAG.getTargetLoweringInfo(), &Options, Depth+1))
  536. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  537. GetNegatedExpression(Op.getOperand(0), DAG,
  538. LegalOperations, Depth+1),
  539. Op.getOperand(1));
  540. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  541. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  542. GetNegatedExpression(Op.getOperand(1), DAG,
  543. LegalOperations, Depth+1),
  544. Op.getOperand(0));
  545. case ISD::FSUB:
  546. // We can't turn -(A-B) into B-A when we honor signed zeros.
  547. assert(Options.UnsafeFPMath);
  548. // fold (fneg (fsub 0, B)) -> B
  549. if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
  550. if (N0CFP->isZero())
  551. return Op.getOperand(1);
  552. // fold (fneg (fsub A, B)) -> (fsub B, A)
  553. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  554. Op.getOperand(1), Op.getOperand(0));
  555. case ISD::FMUL:
  556. case ISD::FDIV:
  557. assert(!Options.HonorSignDependentRoundingFPMath());
  558. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
  559. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  560. DAG.getTargetLoweringInfo(), &Options, Depth+1))
  561. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  562. GetNegatedExpression(Op.getOperand(0), DAG,
  563. LegalOperations, Depth+1),
  564. Op.getOperand(1));
  565. // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
  566. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  567. Op.getOperand(0),
  568. GetNegatedExpression(Op.getOperand(1), DAG,
  569. LegalOperations, Depth+1));
  570. case ISD::FP_EXTEND:
  571. case ISD::FSIN:
  572. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  573. GetNegatedExpression(Op.getOperand(0), DAG,
  574. LegalOperations, Depth+1));
  575. case ISD::FP_ROUND:
  576. return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
  577. GetNegatedExpression(Op.getOperand(0), DAG,
  578. LegalOperations, Depth+1),
  579. Op.getOperand(1));
  580. }
  581. }
  582. // Return true if this node is a setcc, or is a select_cc
  583. // that selects between the target values used for true and false, making it
  584. // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
  585. // the appropriate nodes based on the type of node we are checking. This
  586. // simplifies life a bit for the callers.
  587. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
  588. SDValue &CC) const {
  589. if (N.getOpcode() == ISD::SETCC) {
  590. LHS = N.getOperand(0);
  591. RHS = N.getOperand(1);
  592. CC = N.getOperand(2);
  593. return true;
  594. }
  595. if (N.getOpcode() != ISD::SELECT_CC ||
  596. !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
  597. !TLI.isConstFalseVal(N.getOperand(3).getNode()))
  598. return false;
  599. if (TLI.getBooleanContents(N.getValueType()) ==
  600. TargetLowering::UndefinedBooleanContent)
  601. return false;
  602. LHS = N.getOperand(0);
  603. RHS = N.getOperand(1);
  604. CC = N.getOperand(4);
  605. return true;
  606. }
  607. /// Return true if this is a SetCC-equivalent operation with only one use.
  608. /// If this is true, it allows the users to invert the operation for free when
  609. /// it is profitable to do so.
  610. bool DAGCombiner::isOneUseSetCC(SDValue N) const {
  611. SDValue N0, N1, N2;
  612. if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
  613. return true;
  614. return false;
  615. }
  616. /// Returns true if N is a BUILD_VECTOR node whose
  617. /// elements are all the same constant or undefined.
  618. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
  619. BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
  620. if (!C)
  621. return false;
  622. APInt SplatUndef;
  623. unsigned SplatBitSize;
  624. bool HasAnyUndefs;
  625. EVT EltVT = N->getValueType(0).getVectorElementType();
  626. return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
  627. HasAnyUndefs) &&
  628. EltVT.getSizeInBits() >= SplatBitSize);
  629. }
  630. // \brief Returns the SDNode if it is a constant integer BuildVector
  631. // or constant integer.
  632. static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
  633. if (isa<ConstantSDNode>(N))
  634. return N.getNode();
  635. if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
  636. return N.getNode();
  637. return nullptr;
  638. }
  639. // \brief Returns the SDNode if it is a constant float BuildVector
  640. // or constant float.
  641. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
  642. if (isa<ConstantFPSDNode>(N))
  643. return N.getNode();
  644. if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
  645. return N.getNode();
  646. return nullptr;
  647. }
  648. // \brief Returns the SDNode if it is a constant splat BuildVector or constant
  649. // int.
  650. static ConstantSDNode *isConstOrConstSplat(SDValue N) {
  651. if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
  652. return CN;
  653. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
  654. BitVector UndefElements;
  655. ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
  656. // BuildVectors can truncate their operands. Ignore that case here.
  657. // FIXME: We blindly ignore splats which include undef which is overly
  658. // pessimistic.
  659. if (CN && UndefElements.none() &&
  660. CN->getValueType(0) == N.getValueType().getScalarType())
  661. return CN;
  662. }
  663. return nullptr;
  664. }
  665. // \brief Returns the SDNode if it is a constant splat BuildVector or constant
  666. // float.
  667. static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
  668. if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
  669. return CN;
  670. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
  671. BitVector UndefElements;
  672. ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
  673. if (CN && UndefElements.none())
  674. return CN;
  675. }
  676. return nullptr;
  677. }
  678. SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
  679. SDValue N0, SDValue N1) {
  680. EVT VT = N0.getValueType();
  681. if (N0.getOpcode() == Opc) {
  682. if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
  683. if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
  684. // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
  685. if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
  686. return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
  687. return SDValue();
  688. }
  689. if (N0.hasOneUse()) {
  690. // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
  691. // use
  692. SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
  693. if (!OpNode.getNode())
  694. return SDValue();
  695. AddToWorklist(OpNode.getNode());
  696. return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
  697. }
  698. }
  699. }
  700. if (N1.getOpcode() == Opc) {
  701. if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
  702. if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
  703. // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
  704. if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
  705. return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
  706. return SDValue();
  707. }
  708. if (N1.hasOneUse()) {
  709. // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
  710. // use
  711. SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
  712. if (!OpNode.getNode())
  713. return SDValue();
  714. AddToWorklist(OpNode.getNode());
  715. return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
  716. }
  717. }
  718. }
  719. return SDValue();
  720. }
  721. SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  722. bool AddTo) {
  723. assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
  724. ++NodesCombined;
  725. DEBUG(dbgs() << "\nReplacing.1 ";
  726. N->dump(&DAG);
  727. dbgs() << "\nWith: ";
  728. To[0].getNode()->dump(&DAG);
  729. dbgs() << " and " << NumTo-1 << " other values\n");
  730. for (unsigned i = 0, e = NumTo; i != e; ++i)
  731. assert((!To[i].getNode() ||
  732. N->getValueType(i) == To[i].getValueType()) &&
  733. "Cannot combine value to value of different type!");
  734. WorklistRemover DeadNodes(*this);
  735. DAG.ReplaceAllUsesWith(N, To);
  736. if (AddTo) {
  737. // Push the new nodes and any users onto the worklist
  738. for (unsigned i = 0, e = NumTo; i != e; ++i) {
  739. if (To[i].getNode()) {
  740. AddToWorklist(To[i].getNode());
  741. AddUsersToWorklist(To[i].getNode());
  742. }
  743. }
  744. }
  745. // Finally, if the node is now dead, remove it from the graph. The node
  746. // may not be dead if the replacement process recursively simplified to
  747. // something else needing this node.
  748. if (N->use_empty())
  749. deleteAndRecombine(N);
  750. return SDValue(N, 0);
  751. }
  752. void DAGCombiner::
  753. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  754. // Replace all uses. If any nodes become isomorphic to other nodes and
  755. // are deleted, make sure to remove them from our worklist.
  756. WorklistRemover DeadNodes(*this);
  757. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
  758. // Push the new node and any (possibly new) users onto the worklist.
  759. AddToWorklist(TLO.New.getNode());
  760. AddUsersToWorklist(TLO.New.getNode());
  761. // Finally, if the node is now dead, remove it from the graph. The node
  762. // may not be dead if the replacement process recursively simplified to
  763. // something else needing this node.
  764. if (TLO.Old.getNode()->use_empty())
  765. deleteAndRecombine(TLO.Old.getNode());
  766. }
  767. /// Check the specified integer node value to see if it can be simplified or if
  768. /// things it uses can be simplified by bit propagation. If so, return true.
  769. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
  770. TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
  771. APInt KnownZero, KnownOne;
  772. if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
  773. return false;
  774. // Revisit the node.
  775. AddToWorklist(Op.getNode());
  776. // Replace the old value with the new one.
  777. ++NodesCombined;
  778. DEBUG(dbgs() << "\nReplacing.2 ";
  779. TLO.Old.getNode()->dump(&DAG);
  780. dbgs() << "\nWith: ";
  781. TLO.New.getNode()->dump(&DAG);
  782. dbgs() << '\n');
  783. CommitTargetLoweringOpt(TLO);
  784. return true;
  785. }
  786. void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
  787. SDLoc dl(Load);
  788. EVT VT = Load->getValueType(0);
  789. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
  790. DEBUG(dbgs() << "\nReplacing.9 ";
  791. Load->dump(&DAG);
  792. dbgs() << "\nWith: ";
  793. Trunc.getNode()->dump(&DAG);
  794. dbgs() << '\n');
  795. WorklistRemover DeadNodes(*this);
  796. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
  797. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
  798. deleteAndRecombine(Load);
  799. AddToWorklist(Trunc.getNode());
  800. }
  801. SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
  802. Replace = false;
  803. SDLoc dl(Op);
  804. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
  805. EVT MemVT = LD->getMemoryVT();
  806. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
  807. ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
  808. : ISD::EXTLOAD)
  809. : LD->getExtensionType();
  810. Replace = true;
  811. return DAG.getExtLoad(ExtType, dl, PVT,
  812. LD->getChain(), LD->getBasePtr(),
  813. MemVT, LD->getMemOperand());
  814. }
  815. unsigned Opc = Op.getOpcode();
  816. switch (Opc) {
  817. default: break;
  818. case ISD::AssertSext:
  819. return DAG.getNode(ISD::AssertSext, dl, PVT,
  820. SExtPromoteOperand(Op.getOperand(0), PVT),
  821. Op.getOperand(1));
  822. case ISD::AssertZext:
  823. return DAG.getNode(ISD::AssertZext, dl, PVT,
  824. ZExtPromoteOperand(Op.getOperand(0), PVT),
  825. Op.getOperand(1));
  826. case ISD::Constant: {
  827. unsigned ExtOpc =
  828. Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
  829. return DAG.getNode(ExtOpc, dl, PVT, Op);
  830. }
  831. }
  832. if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
  833. return SDValue();
  834. return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
  835. }
  836. SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
  837. if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
  838. return SDValue();
  839. EVT OldVT = Op.getValueType();
  840. SDLoc dl(Op);
  841. bool Replace = false;
  842. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  843. if (!NewOp.getNode())
  844. return SDValue();
  845. AddToWorklist(NewOp.getNode());
  846. if (Replace)
  847. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  848. return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
  849. DAG.getValueType(OldVT));
  850. }
  851. SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
  852. EVT OldVT = Op.getValueType();
  853. SDLoc dl(Op);
  854. bool Replace = false;
  855. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  856. if (!NewOp.getNode())
  857. return SDValue();
  858. AddToWorklist(NewOp.getNode());
  859. if (Replace)
  860. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  861. return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
  862. }
  863. /// Promote the specified integer binary operation if the target indicates it is
  864. /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
  865. /// i32 since i16 instructions are longer.
  866. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
  867. if (!LegalOperations)
  868. return SDValue();
  869. EVT VT = Op.getValueType();
  870. if (VT.isVector() || !VT.isInteger())
  871. return SDValue();
  872. // If operation type is 'undesirable', e.g. i16 on x86, consider
  873. // promoting it.
  874. unsigned Opc = Op.getOpcode();
  875. if (TLI.isTypeDesirableForOp(Opc, VT))
  876. return SDValue();
  877. EVT PVT = VT;
  878. // Consult target whether it is a good idea to promote this operation and
  879. // what's the right type to promote it to.
  880. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  881. assert(PVT != VT && "Don't know what type to promote to!");
  882. bool Replace0 = false;
  883. SDValue N0 = Op.getOperand(0);
  884. SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
  885. if (!NN0.getNode())
  886. return SDValue();
  887. bool Replace1 = false;
  888. SDValue N1 = Op.getOperand(1);
  889. SDValue NN1;
  890. if (N0 == N1)
  891. NN1 = NN0;
  892. else {
  893. NN1 = PromoteOperand(N1, PVT, Replace1);
  894. if (!NN1.getNode())
  895. return SDValue();
  896. }
  897. AddToWorklist(NN0.getNode());
  898. if (NN1.getNode())
  899. AddToWorklist(NN1.getNode());
  900. if (Replace0)
  901. ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
  902. if (Replace1)
  903. ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
  904. DEBUG(dbgs() << "\nPromoting ";
  905. Op.getNode()->dump(&DAG));
  906. SDLoc dl(Op);
  907. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  908. DAG.getNode(Opc, dl, PVT, NN0, NN1));
  909. }
  910. return SDValue();
  911. }
  912. /// Promote the specified integer shift operation if the target indicates it is
  913. /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
  914. /// i32 since i16 instructions are longer.
  915. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
  916. if (!LegalOperations)
  917. return SDValue();
  918. EVT VT = Op.getValueType();
  919. if (VT.isVector() || !VT.isInteger())
  920. return SDValue();
  921. // If operation type is 'undesirable', e.g. i16 on x86, consider
  922. // promoting it.
  923. unsigned Opc = Op.getOpcode();
  924. if (TLI.isTypeDesirableForOp(Opc, VT))
  925. return SDValue();
  926. EVT PVT = VT;
  927. // Consult target whether it is a good idea to promote this operation and
  928. // what's the right type to promote it to.
  929. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  930. assert(PVT != VT && "Don't know what type to promote to!");
  931. bool Replace = false;
  932. SDValue N0 = Op.getOperand(0);
  933. if (Opc == ISD::SRA)
  934. N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
  935. else if (Opc == ISD::SRL)
  936. N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
  937. else
  938. N0 = PromoteOperand(N0, PVT, Replace);
  939. if (!N0.getNode())
  940. return SDValue();
  941. AddToWorklist(N0.getNode());
  942. if (Replace)
  943. ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
  944. DEBUG(dbgs() << "\nPromoting ";
  945. Op.getNode()->dump(&DAG));
  946. SDLoc dl(Op);
  947. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  948. DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
  949. }
  950. return SDValue();
  951. }
  952. SDValue DAGCombiner::PromoteExtend(SDValue Op) {
  953. if (!LegalOperations)
  954. return SDValue();
  955. EVT VT = Op.getValueType();
  956. if (VT.isVector() || !VT.isInteger())
  957. return SDValue();
  958. // If operation type is 'undesirable', e.g. i16 on x86, consider
  959. // promoting it.
  960. unsigned Opc = Op.getOpcode();
  961. if (TLI.isTypeDesirableForOp(Opc, VT))
  962. return SDValue();
  963. EVT PVT = VT;
  964. // Consult target whether it is a good idea to promote this operation and
  965. // what's the right type to promote it to.
  966. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  967. assert(PVT != VT && "Don't know what type to promote to!");
  968. // fold (aext (aext x)) -> (aext x)
  969. // fold (aext (zext x)) -> (zext x)
  970. // fold (aext (sext x)) -> (sext x)
  971. DEBUG(dbgs() << "\nPromoting ";
  972. Op.getNode()->dump(&DAG));
  973. return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
  974. }
  975. return SDValue();
  976. }
  977. bool DAGCombiner::PromoteLoad(SDValue Op) {
  978. if (!LegalOperations)
  979. return false;
  980. EVT VT = Op.getValueType();
  981. if (VT.isVector() || !VT.isInteger())
  982. return false;
  983. // If operation type is 'undesirable', e.g. i16 on x86, consider
  984. // promoting it.
  985. unsigned Opc = Op.getOpcode();
  986. if (TLI.isTypeDesirableForOp(Opc, VT))
  987. return false;
  988. EVT PVT = VT;
  989. // Consult target whether it is a good idea to promote this operation and
  990. // what's the right type to promote it to.
  991. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  992. assert(PVT != VT && "Don't know what type to promote to!");
  993. SDLoc dl(Op);
  994. SDNode *N = Op.getNode();
  995. LoadSDNode *LD = cast<LoadSDNode>(N);
  996. EVT MemVT = LD->getMemoryVT();
  997. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
  998. ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
  999. : ISD::EXTLOAD)
  1000. : LD->getExtensionType();
  1001. SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
  1002. LD->getChain(), LD->getBasePtr(),
  1003. MemVT, LD->getMemOperand());
  1004. SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
  1005. DEBUG(dbgs() << "\nPromoting ";
  1006. N->dump(&DAG);
  1007. dbgs() << "\nTo: ";
  1008. Result.getNode()->dump(&DAG);
  1009. dbgs() << '\n');
  1010. WorklistRemover DeadNodes(*this);
  1011. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  1012. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
  1013. deleteAndRecombine(N);
  1014. AddToWorklist(Result.getNode());
  1015. return true;
  1016. }
  1017. return false;
  1018. }
  1019. /// \brief Recursively delete a node which has no uses and any operands for
  1020. /// which it is the only use.
  1021. ///
  1022. /// Note that this both deletes the nodes and removes them from the worklist.
  1023. /// It also adds any nodes who have had a user deleted to the worklist as they
  1024. /// may now have only one use and subject to other combines.
  1025. bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
  1026. if (!N->use_empty())
  1027. return false;
  1028. SmallSetVector<SDNode *, 16> Nodes;
  1029. Nodes.insert(N);
  1030. do {
  1031. N = Nodes.pop_back_val();
  1032. if (!N)
  1033. continue;
  1034. if (N->use_empty()) {
  1035. for (const SDValue &ChildN : N->op_values())
  1036. Nodes.insert(ChildN.getNode());
  1037. removeFromWorklist(N);
  1038. DAG.DeleteNode(N);
  1039. } else {
  1040. AddToWorklist(N);
  1041. }
  1042. } while (!Nodes.empty());
  1043. return true;
  1044. }
  1045. //===----------------------------------------------------------------------===//
  1046. // Main DAG Combiner implementation
  1047. //===----------------------------------------------------------------------===//
  1048. void DAGCombiner::Run(CombineLevel AtLevel) {
  1049. // set the instance variables, so that the various visit routines may use it.
  1050. Level = AtLevel;
  1051. LegalOperations = Level >= AfterLegalizeVectorOps;
  1052. LegalTypes = Level >= AfterLegalizeTypes;
  1053. // Add all the dag nodes to the worklist.
  1054. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
  1055. E = DAG.allnodes_end(); I != E; ++I)
  1056. AddToWorklist(I);
  1057. // Create a dummy node (which is not added to allnodes), that adds a reference
  1058. // to the root node, preventing it from being deleted, and tracking any
  1059. // changes of the root.
  1060. HandleSDNode Dummy(DAG.getRoot());
  1061. // while the worklist isn't empty, find a node and
  1062. // try and combine it.
  1063. while (!WorklistMap.empty()) {
  1064. SDNode *N;
  1065. // The Worklist holds the SDNodes in order, but it may contain null entries.
  1066. do {
  1067. N = Worklist.pop_back_val();
  1068. } while (!N);
  1069. bool GoodWorklistEntry = WorklistMap.erase(N);
  1070. (void)GoodWorklistEntry;
  1071. assert(GoodWorklistEntry &&
  1072. "Found a worklist entry without a corresponding map entry!");
  1073. // If N has no uses, it is dead. Make sure to revisit all N's operands once
  1074. // N is deleted from the DAG, since they too may now be dead or may have a
  1075. // reduced number of uses, allowing other xforms.
  1076. if (recursivelyDeleteUnusedNodes(N))
  1077. continue;
  1078. WorklistRemover DeadNodes(*this);
  1079. // If this combine is running after legalizing the DAG, re-legalize any
  1080. // nodes pulled off the worklist.
  1081. if (Level == AfterLegalizeDAG) {
  1082. SmallSetVector<SDNode *, 16> UpdatedNodes;
  1083. bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
  1084. for (SDNode *LN : UpdatedNodes) {
  1085. AddToWorklist(LN);
  1086. AddUsersToWorklist(LN);
  1087. }
  1088. if (!NIsValid)
  1089. continue;
  1090. }
  1091. DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
  1092. // Add any operands of the new node which have not yet been combined to the
  1093. // worklist as well. Because the worklist uniques things already, this
  1094. // won't repeatedly process the same operand.
  1095. CombinedNodes.insert(N);
  1096. for (const SDValue &ChildN : N->op_values())
  1097. if (!CombinedNodes.count(ChildN.getNode()))
  1098. AddToWorklist(ChildN.getNode());
  1099. SDValue RV = combine(N);
  1100. if (!RV.getNode())
  1101. continue;
  1102. ++NodesCombined;
  1103. // If we get back the same node we passed in, rather than a new node or
  1104. // zero, we know that the node must have defined multiple values and
  1105. // CombineTo was used. Since CombineTo takes care of the worklist
  1106. // mechanics for us, we have no work to do in this case.
  1107. if (RV.getNode() == N)
  1108. continue;
  1109. assert(N->getOpcode() != ISD::DELETED_NODE &&
  1110. RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
  1111. "Node was deleted but visit returned new node!");
  1112. DEBUG(dbgs() << " ... into: ";
  1113. RV.getNode()->dump(&DAG));
  1114. // Transfer debug value.
  1115. DAG.TransferDbgValues(SDValue(N, 0), RV);
  1116. if (N->getNumValues() == RV.getNode()->getNumValues())
  1117. DAG.ReplaceAllUsesWith(N, RV.getNode());
  1118. else {
  1119. assert(N->getValueType(0) == RV.getValueType() &&
  1120. N->getNumValues() == 1 && "Type mismatch");
  1121. SDValue OpV = RV;
  1122. DAG.ReplaceAllUsesWith(N, &OpV);
  1123. }
  1124. // Push the new node and any users onto the worklist
  1125. AddToWorklist(RV.getNode());
  1126. AddUsersToWorklist(RV.getNode());
  1127. // Finally, if the node is now dead, remove it from the graph. The node
  1128. // may not be dead if the replacement process recursively simplified to
  1129. // something else needing this node. This will also take care of adding any
  1130. // operands which have lost a user to the worklist.
  1131. recursivelyDeleteUnusedNodes(N);
  1132. }
  1133. // If the root changed (e.g. it was a dead load, update the root).
  1134. DAG.setRoot(Dummy.getValue());
  1135. DAG.RemoveDeadNodes();
  1136. }
  1137. SDValue DAGCombiner::visit(SDNode *N) {
  1138. switch (N->getOpcode()) {
  1139. default: break;
  1140. case ISD::TokenFactor: return visitTokenFactor(N);
  1141. case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
  1142. case ISD::ADD: return visitADD(N);
  1143. case ISD::SUB: return visitSUB(N);
  1144. case ISD::ADDC: return visitADDC(N);
  1145. case ISD::SUBC: return visitSUBC(N);
  1146. case ISD::ADDE: return visitADDE(N);
  1147. case ISD::SUBE: return visitSUBE(N);
  1148. case ISD::MUL: return visitMUL(N);
  1149. case ISD::SDIV: return visitSDIV(N);
  1150. case ISD::UDIV: return visitUDIV(N);
  1151. case ISD::SREM: return visitSREM(N);
  1152. case ISD::UREM: return visitUREM(N);
  1153. case ISD::MULHU: return visitMULHU(N);
  1154. case ISD::MULHS: return visitMULHS(N);
  1155. case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
  1156. case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
  1157. case ISD::SMULO: return visitSMULO(N);
  1158. case ISD::UMULO: return visitUMULO(N);
  1159. case ISD::SDIVREM: return visitSDIVREM(N);
  1160. case ISD::UDIVREM: return visitUDIVREM(N);
  1161. case ISD::AND: return visitAND(N);
  1162. case ISD::OR: return visitOR(N);
  1163. case ISD::XOR: return visitXOR(N);
  1164. case ISD::SHL: return visitSHL(N);
  1165. case ISD::SRA: return visitSRA(N);
  1166. case ISD::SRL: return visitSRL(N);
  1167. case ISD::ROTR:
  1168. case ISD::ROTL: return visitRotate(N);
  1169. case ISD::BSWAP: return visitBSWAP(N);
  1170. case ISD::CTLZ: return visitCTLZ(N);
  1171. case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
  1172. case ISD::CTTZ: return visitCTTZ(N);
  1173. case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
  1174. case ISD::CTPOP: return visitCTPOP(N);
  1175. case ISD::SELECT: return visitSELECT(N);
  1176. case ISD::VSELECT: return visitVSELECT(N);
  1177. case ISD::SELECT_CC: return visitSELECT_CC(N);
  1178. case ISD::SETCC: return visitSETCC(N);
  1179. case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
  1180. case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
  1181. case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
  1182. case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
  1183. case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
  1184. case ISD::TRUNCATE: return visitTRUNCATE(N);
  1185. case ISD::BITCAST: return visitBITCAST(N);
  1186. case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
  1187. case ISD::FADD: return visitFADD(N);
  1188. case ISD::FSUB: return visitFSUB(N);
  1189. case ISD::FMUL: return visitFMUL(N);
  1190. case ISD::FMA: return visitFMA(N);
  1191. case ISD::FDIV: return visitFDIV(N);
  1192. case ISD::FREM: return visitFREM(N);
  1193. case ISD::FSQRT: return visitFSQRT(N);
  1194. case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
  1195. case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
  1196. case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
  1197. case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
  1198. case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
  1199. case ISD::FP_ROUND: return visitFP_ROUND(N);
  1200. case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
  1201. case ISD::FP_EXTEND: return visitFP_EXTEND(N);
  1202. case ISD::FNEG: return visitFNEG(N);
  1203. case ISD::FABS: return visitFABS(N);
  1204. case ISD::FFLOOR: return visitFFLOOR(N);
  1205. case ISD::FMINNUM: return visitFMINNUM(N);
  1206. case ISD::FMAXNUM: return visitFMAXNUM(N);
  1207. case ISD::FCEIL: return visitFCEIL(N);
  1208. case ISD::FTRUNC: return visitFTRUNC(N);
  1209. case ISD::BRCOND: return visitBRCOND(N);
  1210. case ISD::BR_CC: return visitBR_CC(N);
  1211. case ISD::LOAD: return visitLOAD(N);
  1212. case ISD::STORE: return visitSTORE(N);
  1213. case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
  1214. case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
  1215. case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
  1216. case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
  1217. case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
  1218. case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
  1219. case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
  1220. case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
  1221. case ISD::MGATHER: return visitMGATHER(N);
  1222. case ISD::MLOAD: return visitMLOAD(N);
  1223. case ISD::MSCATTER: return visitMSCATTER(N);
  1224. case ISD::MSTORE: return visitMSTORE(N);
  1225. case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
  1226. }
  1227. return SDValue();
  1228. }
  1229. SDValue DAGCombiner::combine(SDNode *N) {
  1230. SDValue RV = visit(N);
  1231. // If nothing happened, try a target-specific DAG combine.
  1232. if (!RV.getNode()) {
  1233. assert(N->getOpcode() != ISD::DELETED_NODE &&
  1234. "Node was deleted but visit returned NULL!");
  1235. if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
  1236. TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
  1237. // Expose the DAG combiner to the target combiner impls.
  1238. TargetLowering::DAGCombinerInfo
  1239. DagCombineInfo(DAG, Level, false, this);
  1240. RV = TLI.PerformDAGCombine(N, DagCombineInfo);
  1241. }
  1242. }
  1243. // If nothing happened still, try promoting the operation.
  1244. if (!RV.getNode()) {
  1245. switch (N->getOpcode()) {
  1246. default: break;
  1247. case ISD::ADD:
  1248. case ISD::SUB:
  1249. case ISD::MUL:
  1250. case ISD::AND:
  1251. case ISD::OR:
  1252. case ISD::XOR:
  1253. RV = PromoteIntBinOp(SDValue(N, 0));
  1254. break;
  1255. case ISD::SHL:
  1256. case ISD::SRA:
  1257. case ISD::SRL:
  1258. RV = PromoteIntShiftOp(SDValue(N, 0));
  1259. break;
  1260. case ISD::SIGN_EXTEND:
  1261. case ISD::ZERO_EXTEND:
  1262. case ISD::ANY_EXTEND:
  1263. RV = PromoteExtend(SDValue(N, 0));
  1264. break;
  1265. case ISD::LOAD:
  1266. if (PromoteLoad(SDValue(N, 0)))
  1267. RV = SDValue(N, 0);
  1268. break;
  1269. }
  1270. }
  1271. // If N is a commutative binary node, try commuting it to enable more
  1272. // sdisel CSE.
  1273. if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
  1274. N->getNumValues() == 1) {
  1275. SDValue N0 = N->getOperand(0);
  1276. SDValue N1 = N->getOperand(1);
  1277. // Constant operands are canonicalized to RHS.
  1278. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
  1279. SDValue Ops[] = {N1, N0};
  1280. SDNode *CSENode;
  1281. if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
  1282. CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
  1283. &BinNode->Flags);
  1284. } else {
  1285. CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
  1286. }
  1287. if (CSENode)
  1288. return SDValue(CSENode, 0);
  1289. }
  1290. }
  1291. return RV;
  1292. }
  1293. /// Given a node, return its input chain if it has one, otherwise return a null
  1294. /// sd operand.
  1295. static SDValue getInputChainForNode(SDNode *N) {
  1296. if (unsigned NumOps = N->getNumOperands()) {
  1297. if (N->getOperand(0).getValueType() == MVT::Other)
  1298. return N->getOperand(0);
  1299. if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
  1300. return N->getOperand(NumOps-1);
  1301. for (unsigned i = 1; i < NumOps-1; ++i)
  1302. if (N->getOperand(i).getValueType() == MVT::Other)
  1303. return N->getOperand(i);
  1304. }
  1305. return SDValue();
  1306. }
  1307. SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  1308. // If N has two operands, where one has an input chain equal to the other,
  1309. // the 'other' chain is redundant.
  1310. if (N->getNumOperands() == 2) {
  1311. if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
  1312. return N->getOperand(0);
  1313. if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
  1314. return N->getOperand(1);
  1315. }
  1316. SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
  1317. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
  1318. SmallPtrSet<SDNode*, 16> SeenOps;
  1319. bool Changed = false; // If we should replace this token factor.
  1320. // Start out with this token factor.
  1321. TFs.push_back(N);
  1322. // Iterate through token factors. The TFs grows when new token factors are
  1323. // encountered.
  1324. for (unsigned i = 0; i < TFs.size(); ++i) {
  1325. SDNode *TF = TFs[i];
  1326. // Check each of the operands.
  1327. for (const SDValue &Op : TF->op_values()) {
  1328. switch (Op.getOpcode()) {
  1329. case ISD::EntryToken:
  1330. // Entry tokens don't need to be added to the list. They are
  1331. // redundant.
  1332. Changed = true;
  1333. break;
  1334. case ISD::TokenFactor:
  1335. if (Op.hasOneUse() &&
  1336. std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
  1337. // Queue up for processing.
  1338. TFs.push_back(Op.getNode());
  1339. // Clean up in case the token factor is removed.
  1340. AddToWorklist(Op.getNode());
  1341. Changed = true;
  1342. break;
  1343. }
  1344. // Fall thru
  1345. default:
  1346. // Only add if it isn't already in the list.
  1347. if (SeenOps.insert(Op.getNode()).second)
  1348. Ops.push_back(Op);
  1349. else
  1350. Changed = true;
  1351. break;
  1352. }
  1353. }
  1354. }
  1355. SDValue Result;
  1356. // If we've changed things around then replace token factor.
  1357. if (Changed) {
  1358. if (Ops.empty()) {
  1359. // The entry token is the only possible outcome.
  1360. Result = DAG.getEntryNode();
  1361. } else {
  1362. // New and improved token factor.
  1363. Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
  1364. }
  1365. // Add users to worklist if AA is enabled, since it may introduce
  1366. // a lot of new chained token factors while removing memory deps.
  1367. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
  1368. : DAG.getSubtarget().useAA();
  1369. return CombineTo(N, Result, UseAA /*add to worklist*/);
  1370. }
  1371. return Result;
  1372. }
  1373. /// MERGE_VALUES can always be eliminated.
  1374. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
  1375. WorklistRemover DeadNodes(*this);
  1376. // Replacing results may cause a different MERGE_VALUES to suddenly
  1377. // be CSE'd with N, and carry its uses with it. Iterate until no
  1378. // uses remain, to ensure that the node can be safely deleted.
  1379. // First add the users of this node to the work list so that they
  1380. // can be tried again once they have new operands.
  1381. AddUsersToWorklist(N);
  1382. do {
  1383. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  1384. DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
  1385. } while (!N->use_empty());
  1386. deleteAndRecombine(N);
  1387. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  1388. }
  1389. static bool isNullConstant(SDValue V) {
  1390. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
  1391. return Const != nullptr && Const->isNullValue();
  1392. }
  1393. static bool isNullFPConstant(SDValue V) {
  1394. ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
  1395. return Const != nullptr && Const->isZero() && !Const->isNegative();
  1396. }
  1397. static bool isAllOnesConstant(SDValue V) {
  1398. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
  1399. return Const != nullptr && Const->isAllOnesValue();
  1400. }
  1401. static bool isOneConstant(SDValue V) {
  1402. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
  1403. return Const != nullptr && Const->isOne();
  1404. }
  1405. /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
  1406. /// ContantSDNode pointer else nullptr.
  1407. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
  1408. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
  1409. return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
  1410. }
  1411. SDValue DAGCombiner::visitADD(SDNode *N) {
  1412. SDValue N0 = N->getOperand(0);
  1413. SDValue N1 = N->getOperand(1);
  1414. EVT VT = N0.getValueType();
  1415. // fold vector ops
  1416. if (VT.isVector()) {
  1417. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  1418. return FoldedVOp;
  1419. // fold (add x, 0) -> x, vector edition
  1420. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  1421. return N0;
  1422. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  1423. return N1;
  1424. }
  1425. // fold (add x, undef) -> undef
  1426. if (N0.getOpcode() == ISD::UNDEF)
  1427. return N0;
  1428. if (N1.getOpcode() == ISD::UNDEF)
  1429. return N1;
  1430. // fold (add c1, c2) -> c1+c2
  1431. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  1432. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  1433. if (N0C && N1C)
  1434. return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
  1435. // canonicalize constant to RHS
  1436. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  1437. !isConstantIntBuildVectorOrConstantInt(N1))
  1438. return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
  1439. // fold (add x, 0) -> x
  1440. if (isNullConstant(N1))
  1441. return N0;
  1442. // fold (add Sym, c) -> Sym+c
  1443. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
  1444. if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
  1445. GA->getOpcode() == ISD::GlobalAddress)
  1446. return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
  1447. GA->getOffset() +
  1448. (uint64_t)N1C->getSExtValue());
  1449. // fold ((c1-A)+c2) -> (c1+c2)-A
  1450. if (N1C && N0.getOpcode() == ISD::SUB)
  1451. if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
  1452. SDLoc DL(N);
  1453. return DAG.getNode(ISD::SUB, DL, VT,
  1454. DAG.getConstant(N1C->getAPIntValue()+
  1455. N0C->getAPIntValue(), DL, VT),
  1456. N0.getOperand(1));
  1457. }
  1458. // reassociate add
  1459. if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
  1460. return RADD;
  1461. // fold ((0-A) + B) -> B-A
  1462. if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
  1463. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
  1464. // fold (A + (0-B)) -> A-B
  1465. if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
  1466. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
  1467. // fold (A+(B-A)) -> B
  1468. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
  1469. return N1.getOperand(0);
  1470. // fold ((B-A)+A) -> B
  1471. if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
  1472. return N0.getOperand(0);
  1473. // fold (A+(B-(A+C))) to (B-C)
  1474. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  1475. N0 == N1.getOperand(1).getOperand(0))
  1476. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
  1477. N1.getOperand(1).getOperand(1));
  1478. // fold (A+(B-(C+A))) to (B-C)
  1479. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  1480. N0 == N1.getOperand(1).getOperand(1))
  1481. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
  1482. N1.getOperand(1).getOperand(0));
  1483. // fold (A+((B-A)+or-C)) to (B+or-C)
  1484. if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
  1485. N1.getOperand(0).getOpcode() == ISD::SUB &&
  1486. N0 == N1.getOperand(0).getOperand(1))
  1487. return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
  1488. N1.getOperand(0).getOperand(0), N1.getOperand(1));
  1489. // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
  1490. if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
  1491. SDValue N00 = N0.getOperand(0);
  1492. SDValue N01 = N0.getOperand(1);
  1493. SDValue N10 = N1.getOperand(0);
  1494. SDValue N11 = N1.getOperand(1);
  1495. if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
  1496. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1497. DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
  1498. DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
  1499. }
  1500. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
  1501. return SDValue(N, 0);
  1502. // fold (a+b) -> (a|b) iff a and b share no bits.
  1503. if (VT.isInteger() && !VT.isVector()) {
  1504. APInt LHSZero, LHSOne;
  1505. APInt RHSZero, RHSOne;
  1506. DAG.computeKnownBits(N0, LHSZero, LHSOne);
  1507. if (LHSZero.getBoolValue()) {
  1508. DAG.computeKnownBits(N1, RHSZero, RHSOne);
  1509. // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
  1510. // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
  1511. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
  1512. if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
  1513. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
  1514. }
  1515. }
  1516. }
  1517. // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
  1518. if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
  1519. isNullConstant(N1.getOperand(0).getOperand(0)))
  1520. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
  1521. DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1522. N1.getOperand(0).getOperand(1),
  1523. N1.getOperand(1)));
  1524. if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
  1525. isNullConstant(N0.getOperand(0).getOperand(0)))
  1526. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
  1527. DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1528. N0.getOperand(0).getOperand(1),
  1529. N0.getOperand(1)));
  1530. if (N1.getOpcode() == ISD::AND) {
  1531. SDValue AndOp0 = N1.getOperand(0);
  1532. unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
  1533. unsigned DestBits = VT.getScalarType().getSizeInBits();
  1534. // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
  1535. // and similar xforms where the inner op is either ~0 or 0.
  1536. if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
  1537. SDLoc DL(N);
  1538. return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
  1539. }
  1540. }
  1541. // add (sext i1), X -> sub X, (zext i1)
  1542. if (N0.getOpcode() == ISD::SIGN_EXTEND &&
  1543. N0.getOperand(0).getValueType() == MVT::i1 &&
  1544. !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
  1545. SDLoc DL(N);
  1546. SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
  1547. return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
  1548. }
  1549. // add X, (sextinreg Y i1) -> sub X, (and Y 1)
  1550. if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
  1551. VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
  1552. if (TN->getVT() == MVT::i1) {
  1553. SDLoc DL(N);
  1554. SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
  1555. DAG.getConstant(1, DL, VT));
  1556. return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
  1557. }
  1558. }
  1559. return SDValue();
  1560. }
  1561. SDValue DAGCombiner::visitADDC(SDNode *N) {
  1562. SDValue N0 = N->getOperand(0);
  1563. SDValue N1 = N->getOperand(1);
  1564. EVT VT = N0.getValueType();
  1565. // If the flag result is dead, turn this into an ADD.
  1566. if (!N->hasAnyUseOfValue(1))
  1567. return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
  1568. DAG.getNode(ISD::CARRY_FALSE,
  1569. SDLoc(N), MVT::Glue));
  1570. // canonicalize constant to RHS.
  1571. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1572. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1573. if (N0C && !N1C)
  1574. return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
  1575. // fold (addc x, 0) -> x + no carry out
  1576. if (isNullConstant(N1))
  1577. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
  1578. SDLoc(N), MVT::Glue));
  1579. // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
  1580. APInt LHSZero, LHSOne;
  1581. APInt RHSZero, RHSOne;
  1582. DAG.computeKnownBits(N0, LHSZero, LHSOne);
  1583. if (LHSZero.getBoolValue()) {
  1584. DAG.computeKnownBits(N1, RHSZero, RHSOne);
  1585. // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
  1586. // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
  1587. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
  1588. return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
  1589. DAG.getNode(ISD::CARRY_FALSE,
  1590. SDLoc(N), MVT::Glue));
  1591. }
  1592. return SDValue();
  1593. }
  1594. SDValue DAGCombiner::visitADDE(SDNode *N) {
  1595. SDValue N0 = N->getOperand(0);
  1596. SDValue N1 = N->getOperand(1);
  1597. SDValue CarryIn = N->getOperand(2);
  1598. // canonicalize constant to RHS
  1599. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1600. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1601. if (N0C && !N1C)
  1602. return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
  1603. N1, N0, CarryIn);
  1604. // fold (adde x, y, false) -> (addc x, y)
  1605. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  1606. return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
  1607. return SDValue();
  1608. }
  1609. // Since it may not be valid to emit a fold to zero for vector initializers
  1610. // check if we can before folding.
  1611. static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
  1612. SelectionDAG &DAG,
  1613. bool LegalOperations, bool LegalTypes) {
  1614. if (!VT.isVector())
  1615. return DAG.getConstant(0, DL, VT);
  1616. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  1617. return DAG.getConstant(0, DL, VT);
  1618. return SDValue();
  1619. }
  1620. SDValue DAGCombiner::visitSUB(SDNode *N) {
  1621. SDValue N0 = N->getOperand(0);
  1622. SDValue N1 = N->getOperand(1);
  1623. EVT VT = N0.getValueType();
  1624. // fold vector ops
  1625. if (VT.isVector()) {
  1626. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  1627. return FoldedVOp;
  1628. // fold (sub x, 0) -> x, vector edition
  1629. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  1630. return N0;
  1631. }
  1632. // fold (sub x, x) -> 0
  1633. // FIXME: Refactor this and xor and other similar operations together.
  1634. if (N0 == N1)
  1635. return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
  1636. // fold (sub c1, c2) -> c1-c2
  1637. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  1638. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  1639. if (N0C && N1C)
  1640. return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
  1641. // fold (sub x, c) -> (add x, -c)
  1642. if (N1C) {
  1643. SDLoc DL(N);
  1644. return DAG.getNode(ISD::ADD, DL, VT, N0,
  1645. DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
  1646. }
  1647. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
  1648. if (isAllOnesConstant(N0))
  1649. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
  1650. // fold A-(A-B) -> B
  1651. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
  1652. return N1.getOperand(1);
  1653. // fold (A+B)-A -> B
  1654. if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
  1655. return N0.getOperand(1);
  1656. // fold (A+B)-B -> A
  1657. if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
  1658. return N0.getOperand(0);
  1659. // fold C2-(A+C1) -> (C2-C1)-A
  1660. ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
  1661. dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
  1662. if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
  1663. SDLoc DL(N);
  1664. SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
  1665. DL, VT);
  1666. return DAG.getNode(ISD::SUB, DL, VT, NewC,
  1667. N1.getOperand(0));
  1668. }
  1669. // fold ((A+(B+or-C))-B) -> A+or-C
  1670. if (N0.getOpcode() == ISD::ADD &&
  1671. (N0.getOperand(1).getOpcode() == ISD::SUB ||
  1672. N0.getOperand(1).getOpcode() == ISD::ADD) &&
  1673. N0.getOperand(1).getOperand(0) == N1)
  1674. return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
  1675. N0.getOperand(0), N0.getOperand(1).getOperand(1));
  1676. // fold ((A+(C+B))-B) -> A+C
  1677. if (N0.getOpcode() == ISD::ADD &&
  1678. N0.getOperand(1).getOpcode() == ISD::ADD &&
  1679. N0.getOperand(1).getOperand(1) == N1)
  1680. return DAG.getNode(ISD::ADD, SDLoc(N), VT,
  1681. N0.getOperand(0), N0.getOperand(1).getOperand(0));
  1682. // fold ((A-(B-C))-C) -> A-B
  1683. if (N0.getOpcode() == ISD::SUB &&
  1684. N0.getOperand(1).getOpcode() == ISD::SUB &&
  1685. N0.getOperand(1).getOperand(1) == N1)
  1686. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1687. N0.getOperand(0), N0.getOperand(1).getOperand(0));
  1688. // If either operand of a sub is undef, the result is undef
  1689. if (N0.getOpcode() == ISD::UNDEF)
  1690. return N0;
  1691. if (N1.getOpcode() == ISD::UNDEF)
  1692. return N1;
  1693. // If the relocation model supports it, consider symbol offsets.
  1694. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
  1695. if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
  1696. // fold (sub Sym, c) -> Sym-c
  1697. if (N1C && GA->getOpcode() == ISD::GlobalAddress)
  1698. return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
  1699. GA->getOffset() -
  1700. (uint64_t)N1C->getSExtValue());
  1701. // fold (sub Sym+c1, Sym+c2) -> c1-c2
  1702. if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
  1703. if (GA->getGlobal() == GB->getGlobal())
  1704. return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
  1705. SDLoc(N), VT);
  1706. }
  1707. // sub X, (sextinreg Y i1) -> add X, (and Y 1)
  1708. if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
  1709. VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
  1710. if (TN->getVT() == MVT::i1) {
  1711. SDLoc DL(N);
  1712. SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
  1713. DAG.getConstant(1, DL, VT));
  1714. return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
  1715. }
  1716. }
  1717. return SDValue();
  1718. }
  1719. SDValue DAGCombiner::visitSUBC(SDNode *N) {
  1720. SDValue N0 = N->getOperand(0);
  1721. SDValue N1 = N->getOperand(1);
  1722. EVT VT = N0.getValueType();
  1723. // If the flag result is dead, turn this into an SUB.
  1724. if (!N->hasAnyUseOfValue(1))
  1725. return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
  1726. DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1727. MVT::Glue));
  1728. // fold (subc x, x) -> 0 + no borrow
  1729. if (N0 == N1) {
  1730. SDLoc DL(N);
  1731. return CombineTo(N, DAG.getConstant(0, DL, VT),
  1732. DAG.getNode(ISD::CARRY_FALSE, DL,
  1733. MVT::Glue));
  1734. }
  1735. // fold (subc x, 0) -> x + no borrow
  1736. if (isNullConstant(N1))
  1737. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1738. MVT::Glue));
  1739. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
  1740. if (isAllOnesConstant(N0))
  1741. return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
  1742. DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1743. MVT::Glue));
  1744. return SDValue();
  1745. }
  1746. SDValue DAGCombiner::visitSUBE(SDNode *N) {
  1747. SDValue N0 = N->getOperand(0);
  1748. SDValue N1 = N->getOperand(1);
  1749. SDValue CarryIn = N->getOperand(2);
  1750. // fold (sube x, y, false) -> (subc x, y)
  1751. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  1752. return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
  1753. return SDValue();
  1754. }
  1755. SDValue DAGCombiner::visitMUL(SDNode *N) {
  1756. SDValue N0 = N->getOperand(0);
  1757. SDValue N1 = N->getOperand(1);
  1758. EVT VT = N0.getValueType();
  1759. // fold (mul x, undef) -> 0
  1760. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  1761. return DAG.getConstant(0, SDLoc(N), VT);
  1762. bool N0IsConst = false;
  1763. bool N1IsConst = false;
  1764. bool N1IsOpaqueConst = false;
  1765. bool N0IsOpaqueConst = false;
  1766. APInt ConstValue0, ConstValue1;
  1767. // fold vector ops
  1768. if (VT.isVector()) {
  1769. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  1770. return FoldedVOp;
  1771. N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
  1772. N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
  1773. } else {
  1774. N0IsConst = isa<ConstantSDNode>(N0);
  1775. if (N0IsConst) {
  1776. ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
  1777. N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
  1778. }
  1779. N1IsConst = isa<ConstantSDNode>(N1);
  1780. if (N1IsConst) {
  1781. ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
  1782. N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
  1783. }
  1784. }
  1785. // fold (mul c1, c2) -> c1*c2
  1786. if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
  1787. return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
  1788. N0.getNode(), N1.getNode());
  1789. // canonicalize constant to RHS (vector doesn't have to splat)
  1790. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  1791. !isConstantIntBuildVectorOrConstantInt(N1))
  1792. return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
  1793. // fold (mul x, 0) -> 0
  1794. if (N1IsConst && ConstValue1 == 0)
  1795. return N1;
  1796. // We require a splat of the entire scalar bit width for non-contiguous
  1797. // bit patterns.
  1798. bool IsFullSplat =
  1799. ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
  1800. // fold (mul x, 1) -> x
  1801. if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
  1802. return N0;
  1803. // fold (mul x, -1) -> 0-x
  1804. if (N1IsConst && ConstValue1.isAllOnesValue()) {
  1805. SDLoc DL(N);
  1806. return DAG.getNode(ISD::SUB, DL, VT,
  1807. DAG.getConstant(0, DL, VT), N0);
  1808. }
  1809. // fold (mul x, (1 << c)) -> x << c
  1810. if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
  1811. IsFullSplat) {
  1812. SDLoc DL(N);
  1813. return DAG.getNode(ISD::SHL, DL, VT, N0,
  1814. DAG.getConstant(ConstValue1.logBase2(), DL,
  1815. getShiftAmountTy(N0.getValueType())));
  1816. }
  1817. // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
  1818. if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
  1819. IsFullSplat) {
  1820. unsigned Log2Val = (-ConstValue1).logBase2();
  1821. SDLoc DL(N);
  1822. // FIXME: If the input is something that is easily negated (e.g. a
  1823. // single-use add), we should put the negate there.
  1824. return DAG.getNode(ISD::SUB, DL, VT,
  1825. DAG.getConstant(0, DL, VT),
  1826. DAG.getNode(ISD::SHL, DL, VT, N0,
  1827. DAG.getConstant(Log2Val, DL,
  1828. getShiftAmountTy(N0.getValueType()))));
  1829. }
  1830. APInt Val;
  1831. // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
  1832. if (N1IsConst && N0.getOpcode() == ISD::SHL &&
  1833. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1834. isa<ConstantSDNode>(N0.getOperand(1)))) {
  1835. SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1836. N1, N0.getOperand(1));
  1837. AddToWorklist(C3.getNode());
  1838. return DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1839. N0.getOperand(0), C3);
  1840. }
  1841. // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
  1842. // use.
  1843. {
  1844. SDValue Sh(nullptr,0), Y(nullptr,0);
  1845. // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
  1846. if (N0.getOpcode() == ISD::SHL &&
  1847. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1848. isa<ConstantSDNode>(N0.getOperand(1))) &&
  1849. N0.getNode()->hasOneUse()) {
  1850. Sh = N0; Y = N1;
  1851. } else if (N1.getOpcode() == ISD::SHL &&
  1852. isa<ConstantSDNode>(N1.getOperand(1)) &&
  1853. N1.getNode()->hasOneUse()) {
  1854. Sh = N1; Y = N0;
  1855. }
  1856. if (Sh.getNode()) {
  1857. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1858. Sh.getOperand(0), Y);
  1859. return DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1860. Mul, Sh.getOperand(1));
  1861. }
  1862. }
  1863. // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
  1864. if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
  1865. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1866. isa<ConstantSDNode>(N0.getOperand(1))))
  1867. return DAG.getNode(ISD::ADD, SDLoc(N), VT,
  1868. DAG.getNode(ISD::MUL, SDLoc(N0), VT,
  1869. N0.getOperand(0), N1),
  1870. DAG.getNode(ISD::MUL, SDLoc(N1), VT,
  1871. N0.getOperand(1), N1));
  1872. // reassociate mul
  1873. if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
  1874. return RMUL;
  1875. return SDValue();
  1876. }
  1877. SDValue DAGCombiner::visitSDIV(SDNode *N) {
  1878. SDValue N0 = N->getOperand(0);
  1879. SDValue N1 = N->getOperand(1);
  1880. EVT VT = N->getValueType(0);
  1881. // fold vector ops
  1882. if (VT.isVector())
  1883. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  1884. return FoldedVOp;
  1885. // fold (sdiv c1, c2) -> c1/c2
  1886. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  1887. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  1888. if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
  1889. return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
  1890. // fold (sdiv X, 1) -> X
  1891. if (N1C && N1C->isOne())
  1892. return N0;
  1893. // fold (sdiv X, -1) -> 0-X
  1894. if (N1C && N1C->isAllOnesValue()) {
  1895. SDLoc DL(N);
  1896. return DAG.getNode(ISD::SUB, DL, VT,
  1897. DAG.getConstant(0, DL, VT), N0);
  1898. }
  1899. // If we know the sign bits of both operands are zero, strength reduce to a
  1900. // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
  1901. if (!VT.isVector()) {
  1902. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  1903. return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
  1904. N0, N1);
  1905. }
  1906. // fold (sdiv X, pow2) -> simple ops after legalize
  1907. // FIXME: We check for the exact bit here because the generic lowering gives
  1908. // better results in that case. The target-specific lowering should learn how
  1909. // to handle exact sdivs efficiently.
  1910. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
  1911. !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
  1912. (N1C->getAPIntValue().isPowerOf2() ||
  1913. (-N1C->getAPIntValue()).isPowerOf2())) {
  1914. // If dividing by powers of two is cheap, then don't perform the following
  1915. // fold.
  1916. if (TLI.isPow2SDivCheap())
  1917. return SDValue();
  1918. // Target-specific implementation of sdiv x, pow2.
  1919. SDValue Res = BuildSDIVPow2(N);
  1920. if (Res.getNode())
  1921. return Res;
  1922. unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
  1923. SDLoc DL(N);
  1924. // Splat the sign bit into the register
  1925. SDValue SGN =
  1926. DAG.getNode(ISD::SRA, DL, VT, N0,
  1927. DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
  1928. getShiftAmountTy(N0.getValueType())));
  1929. AddToWorklist(SGN.getNode());
  1930. // Add (N0 < 0) ? abs2 - 1 : 0;
  1931. SDValue SRL =
  1932. DAG.getNode(ISD::SRL, DL, VT, SGN,
  1933. DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
  1934. getShiftAmountTy(SGN.getValueType())));
  1935. SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
  1936. AddToWorklist(SRL.getNode());
  1937. AddToWorklist(ADD.getNode()); // Divide by pow2
  1938. SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
  1939. DAG.getConstant(lg2, DL,
  1940. getShiftAmountTy(ADD.getValueType())));
  1941. // If we're dividing by a positive value, we're done. Otherwise, we must
  1942. // negate the result.
  1943. if (N1C->getAPIntValue().isNonNegative())
  1944. return SRA;
  1945. AddToWorklist(SRA.getNode());
  1946. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
  1947. }
  1948. // If integer divide is expensive and we satisfy the requirements, emit an
  1949. // alternate sequence.
  1950. if (N1C && !TLI.isIntDivCheap()) {
  1951. SDValue Op = BuildSDIV(N);
  1952. if (Op.getNode()) return Op;
  1953. }
  1954. // undef / X -> 0
  1955. if (N0.getOpcode() == ISD::UNDEF)
  1956. return DAG.getConstant(0, SDLoc(N), VT);
  1957. // X / undef -> undef
  1958. if (N1.getOpcode() == ISD::UNDEF)
  1959. return N1;
  1960. return SDValue();
  1961. }
  1962. SDValue DAGCombiner::visitUDIV(SDNode *N) {
  1963. SDValue N0 = N->getOperand(0);
  1964. SDValue N1 = N->getOperand(1);
  1965. EVT VT = N->getValueType(0);
  1966. // fold vector ops
  1967. if (VT.isVector())
  1968. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  1969. return FoldedVOp;
  1970. // fold (udiv c1, c2) -> c1/c2
  1971. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  1972. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  1973. if (N0C && N1C)
  1974. if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
  1975. N0C, N1C))
  1976. return Folded;
  1977. // fold (udiv x, (1 << c)) -> x >>u c
  1978. if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
  1979. SDLoc DL(N);
  1980. return DAG.getNode(ISD::SRL, DL, VT, N0,
  1981. DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
  1982. getShiftAmountTy(N0.getValueType())));
  1983. }
  1984. // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
  1985. if (N1.getOpcode() == ISD::SHL) {
  1986. if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
  1987. if (SHC->getAPIntValue().isPowerOf2()) {
  1988. EVT ADDVT = N1.getOperand(1).getValueType();
  1989. SDLoc DL(N);
  1990. SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
  1991. N1.getOperand(1),
  1992. DAG.getConstant(SHC->getAPIntValue()
  1993. .logBase2(),
  1994. DL, ADDVT));
  1995. AddToWorklist(Add.getNode());
  1996. return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
  1997. }
  1998. }
  1999. }
  2000. // fold (udiv x, c) -> alternate
  2001. if (N1C && !TLI.isIntDivCheap()) {
  2002. SDValue Op = BuildUDIV(N);
  2003. if (Op.getNode()) return Op;
  2004. }
  2005. // undef / X -> 0
  2006. if (N0.getOpcode() == ISD::UNDEF)
  2007. return DAG.getConstant(0, SDLoc(N), VT);
  2008. // X / undef -> undef
  2009. if (N1.getOpcode() == ISD::UNDEF)
  2010. return N1;
  2011. return SDValue();
  2012. }
  2013. SDValue DAGCombiner::visitSREM(SDNode *N) {
  2014. SDValue N0 = N->getOperand(0);
  2015. SDValue N1 = N->getOperand(1);
  2016. EVT VT = N->getValueType(0);
  2017. // fold (srem c1, c2) -> c1%c2
  2018. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  2019. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  2020. if (N0C && N1C)
  2021. if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
  2022. N0C, N1C))
  2023. return Folded;
  2024. // If we know the sign bits of both operands are zero, strength reduce to a
  2025. // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
  2026. if (!VT.isVector()) {
  2027. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  2028. return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
  2029. }
  2030. // If X/C can be simplified by the division-by-constant logic, lower
  2031. // X%C to the equivalent of X-X/C*C.
  2032. if (N1C && !N1C->isNullValue()) {
  2033. SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
  2034. AddToWorklist(Div.getNode());
  2035. SDValue OptimizedDiv = combine(Div.getNode());
  2036. if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
  2037. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  2038. OptimizedDiv, N1);
  2039. SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
  2040. AddToWorklist(Mul.getNode());
  2041. return Sub;
  2042. }
  2043. }
  2044. // undef % X -> 0
  2045. if (N0.getOpcode() == ISD::UNDEF)
  2046. return DAG.getConstant(0, SDLoc(N), VT);
  2047. // X % undef -> undef
  2048. if (N1.getOpcode() == ISD::UNDEF)
  2049. return N1;
  2050. return SDValue();
  2051. }
  2052. SDValue DAGCombiner::visitUREM(SDNode *N) {
  2053. SDValue N0 = N->getOperand(0);
  2054. SDValue N1 = N->getOperand(1);
  2055. EVT VT = N->getValueType(0);
  2056. // fold (urem c1, c2) -> c1%c2
  2057. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  2058. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  2059. if (N0C && N1C)
  2060. if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
  2061. N0C, N1C))
  2062. return Folded;
  2063. // fold (urem x, pow2) -> (and x, pow2-1)
  2064. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
  2065. N1C->getAPIntValue().isPowerOf2()) {
  2066. SDLoc DL(N);
  2067. return DAG.getNode(ISD::AND, DL, VT, N0,
  2068. DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
  2069. }
  2070. // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
  2071. if (N1.getOpcode() == ISD::SHL) {
  2072. if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
  2073. if (SHC->getAPIntValue().isPowerOf2()) {
  2074. SDLoc DL(N);
  2075. SDValue Add =
  2076. DAG.getNode(ISD::ADD, DL, VT, N1,
  2077. DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
  2078. VT));
  2079. AddToWorklist(Add.getNode());
  2080. return DAG.getNode(ISD::AND, DL, VT, N0, Add);
  2081. }
  2082. }
  2083. }
  2084. // If X/C can be simplified by the division-by-constant logic, lower
  2085. // X%C to the equivalent of X-X/C*C.
  2086. if (N1C && !N1C->isNullValue()) {
  2087. SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
  2088. AddToWorklist(Div.getNode());
  2089. SDValue OptimizedDiv = combine(Div.getNode());
  2090. if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
  2091. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  2092. OptimizedDiv, N1);
  2093. SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
  2094. AddToWorklist(Mul.getNode());
  2095. return Sub;
  2096. }
  2097. }
  2098. // undef % X -> 0
  2099. if (N0.getOpcode() == ISD::UNDEF)
  2100. return DAG.getConstant(0, SDLoc(N), VT);
  2101. // X % undef -> undef
  2102. if (N1.getOpcode() == ISD::UNDEF)
  2103. return N1;
  2104. return SDValue();
  2105. }
  2106. SDValue DAGCombiner::visitMULHS(SDNode *N) {
  2107. SDValue N0 = N->getOperand(0);
  2108. SDValue N1 = N->getOperand(1);
  2109. EVT VT = N->getValueType(0);
  2110. SDLoc DL(N);
  2111. // fold (mulhs x, 0) -> 0
  2112. if (isNullConstant(N1))
  2113. return N1;
  2114. // fold (mulhs x, 1) -> (sra x, size(x)-1)
  2115. if (isOneConstant(N1)) {
  2116. SDLoc DL(N);
  2117. return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
  2118. DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
  2119. DL,
  2120. getShiftAmountTy(N0.getValueType())));
  2121. }
  2122. // fold (mulhs x, undef) -> 0
  2123. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  2124. return DAG.getConstant(0, SDLoc(N), VT);
  2125. // If the type twice as wide is legal, transform the mulhs to a wider multiply
  2126. // plus a shift.
  2127. if (VT.isSimple() && !VT.isVector()) {
  2128. MVT Simple = VT.getSimpleVT();
  2129. unsigned SimpleSize = Simple.getSizeInBits();
  2130. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2131. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2132. N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
  2133. N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
  2134. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  2135. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  2136. DAG.getConstant(SimpleSize, DL,
  2137. getShiftAmountTy(N1.getValueType())));
  2138. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  2139. }
  2140. }
  2141. return SDValue();
  2142. }
  2143. SDValue DAGCombiner::visitMULHU(SDNode *N) {
  2144. SDValue N0 = N->getOperand(0);
  2145. SDValue N1 = N->getOperand(1);
  2146. EVT VT = N->getValueType(0);
  2147. SDLoc DL(N);
  2148. // fold (mulhu x, 0) -> 0
  2149. if (isNullConstant(N1))
  2150. return N1;
  2151. // fold (mulhu x, 1) -> 0
  2152. if (isOneConstant(N1))
  2153. return DAG.getConstant(0, DL, N0.getValueType());
  2154. // fold (mulhu x, undef) -> 0
  2155. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  2156. return DAG.getConstant(0, DL, VT);
  2157. // If the type twice as wide is legal, transform the mulhu to a wider multiply
  2158. // plus a shift.
  2159. if (VT.isSimple() && !VT.isVector()) {
  2160. MVT Simple = VT.getSimpleVT();
  2161. unsigned SimpleSize = Simple.getSizeInBits();
  2162. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2163. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2164. N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
  2165. N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
  2166. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  2167. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  2168. DAG.getConstant(SimpleSize, DL,
  2169. getShiftAmountTy(N1.getValueType())));
  2170. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  2171. }
  2172. }
  2173. return SDValue();
  2174. }
  2175. /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
  2176. /// give the opcodes for the two computations that are being performed. Return
  2177. /// true if a simplification was made.
  2178. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  2179. unsigned HiOp) {
  2180. // If the high half is not needed, just compute the low half.
  2181. bool HiExists = N->hasAnyUseOfValue(1);
  2182. if (!HiExists &&
  2183. (!LegalOperations ||
  2184. TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
  2185. SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
  2186. return CombineTo(N, Res, Res);
  2187. }
  2188. // If the low half is not needed, just compute the high half.
  2189. bool LoExists = N->hasAnyUseOfValue(0);
  2190. if (!LoExists &&
  2191. (!LegalOperations ||
  2192. TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
  2193. SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
  2194. return CombineTo(N, Res, Res);
  2195. }
  2196. // If both halves are used, return as it is.
  2197. if (LoExists && HiExists)
  2198. return SDValue();
  2199. // If the two computed results can be simplified separately, separate them.
  2200. if (LoExists) {
  2201. SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
  2202. AddToWorklist(Lo.getNode());
  2203. SDValue LoOpt = combine(Lo.getNode());
  2204. if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
  2205. (!LegalOperations ||
  2206. TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
  2207. return CombineTo(N, LoOpt, LoOpt);
  2208. }
  2209. if (HiExists) {
  2210. SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
  2211. AddToWorklist(Hi.getNode());
  2212. SDValue HiOpt = combine(Hi.getNode());
  2213. if (HiOpt.getNode() && HiOpt != Hi &&
  2214. (!LegalOperations ||
  2215. TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
  2216. return CombineTo(N, HiOpt, HiOpt);
  2217. }
  2218. return SDValue();
  2219. }
  2220. SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
  2221. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
  2222. if (Res.getNode()) return Res;
  2223. EVT VT = N->getValueType(0);
  2224. SDLoc DL(N);
  2225. // If the type is twice as wide is legal, transform the mulhu to a wider
  2226. // multiply plus a shift.
  2227. if (VT.isSimple() && !VT.isVector()) {
  2228. MVT Simple = VT.getSimpleVT();
  2229. unsigned SimpleSize = Simple.getSizeInBits();
  2230. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2231. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2232. SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
  2233. SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
  2234. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  2235. // Compute the high part as N1.
  2236. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  2237. DAG.getConstant(SimpleSize, DL,
  2238. getShiftAmountTy(Lo.getValueType())));
  2239. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  2240. // Compute the low part as N0.
  2241. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  2242. return CombineTo(N, Lo, Hi);
  2243. }
  2244. }
  2245. return SDValue();
  2246. }
  2247. SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
  2248. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
  2249. if (Res.getNode()) return Res;
  2250. EVT VT = N->getValueType(0);
  2251. SDLoc DL(N);
  2252. // If the type is twice as wide is legal, transform the mulhu to a wider
  2253. // multiply plus a shift.
  2254. if (VT.isSimple() && !VT.isVector()) {
  2255. MVT Simple = VT.getSimpleVT();
  2256. unsigned SimpleSize = Simple.getSizeInBits();
  2257. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2258. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2259. SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
  2260. SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
  2261. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  2262. // Compute the high part as N1.
  2263. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  2264. DAG.getConstant(SimpleSize, DL,
  2265. getShiftAmountTy(Lo.getValueType())));
  2266. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  2267. // Compute the low part as N0.
  2268. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  2269. return CombineTo(N, Lo, Hi);
  2270. }
  2271. }
  2272. return SDValue();
  2273. }
  2274. SDValue DAGCombiner::visitSMULO(SDNode *N) {
  2275. // (smulo x, 2) -> (saddo x, x)
  2276. if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  2277. if (C2->getAPIntValue() == 2)
  2278. return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
  2279. N->getOperand(0), N->getOperand(0));
  2280. return SDValue();
  2281. }
  2282. SDValue DAGCombiner::visitUMULO(SDNode *N) {
  2283. // (umulo x, 2) -> (uaddo x, x)
  2284. if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  2285. if (C2->getAPIntValue() == 2)
  2286. return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
  2287. N->getOperand(0), N->getOperand(0));
  2288. return SDValue();
  2289. }
  2290. SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
  2291. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
  2292. if (Res.getNode()) return Res;
  2293. return SDValue();
  2294. }
  2295. SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
  2296. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
  2297. if (Res.getNode()) return Res;
  2298. return SDValue();
  2299. }
  2300. /// If this is a binary operator with two operands of the same opcode, try to
  2301. /// simplify it.
  2302. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
  2303. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  2304. EVT VT = N0.getValueType();
  2305. assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
  2306. // Bail early if none of these transforms apply.
  2307. if (N0.getNode()->getNumOperands() == 0) return SDValue();
  2308. // For each of OP in AND/OR/XOR:
  2309. // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
  2310. // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
  2311. // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
  2312. // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
  2313. // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
  2314. //
  2315. // do not sink logical op inside of a vector extend, since it may combine
  2316. // into a vsetcc.
  2317. EVT Op0VT = N0.getOperand(0).getValueType();
  2318. if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
  2319. N0.getOpcode() == ISD::SIGN_EXTEND ||
  2320. N0.getOpcode() == ISD::BSWAP ||
  2321. // Avoid infinite looping with PromoteIntBinOp.
  2322. (N0.getOpcode() == ISD::ANY_EXTEND &&
  2323. (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
  2324. (N0.getOpcode() == ISD::TRUNCATE &&
  2325. (!TLI.isZExtFree(VT, Op0VT) ||
  2326. !TLI.isTruncateFree(Op0VT, VT)) &&
  2327. TLI.isTypeLegal(Op0VT))) &&
  2328. !VT.isVector() &&
  2329. Op0VT == N1.getOperand(0).getValueType() &&
  2330. (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
  2331. SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
  2332. N0.getOperand(0).getValueType(),
  2333. N0.getOperand(0), N1.getOperand(0));
  2334. AddToWorklist(ORNode.getNode());
  2335. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
  2336. }
  2337. // For each of OP in SHL/SRL/SRA/AND...
  2338. // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
  2339. // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
  2340. // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
  2341. if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
  2342. N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
  2343. N0.getOperand(1) == N1.getOperand(1)) {
  2344. SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
  2345. N0.getOperand(0).getValueType(),
  2346. N0.getOperand(0), N1.getOperand(0));
  2347. AddToWorklist(ORNode.getNode());
  2348. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  2349. ORNode, N0.getOperand(1));
  2350. }
  2351. // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
  2352. // Only perform this optimization after type legalization and before
  2353. // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
  2354. // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
  2355. // we don't want to undo this promotion.
  2356. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
  2357. // on scalars.
  2358. if ((N0.getOpcode() == ISD::BITCAST ||
  2359. N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
  2360. Level == AfterLegalizeTypes) {
  2361. SDValue In0 = N0.getOperand(0);
  2362. SDValue In1 = N1.getOperand(0);
  2363. EVT In0Ty = In0.getValueType();
  2364. EVT In1Ty = In1.getValueType();
  2365. SDLoc DL(N);
  2366. // If both incoming values are integers, and the original types are the
  2367. // same.
  2368. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
  2369. SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
  2370. SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
  2371. AddToWorklist(Op.getNode());
  2372. return BC;
  2373. }
  2374. }
  2375. // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
  2376. // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
  2377. // If both shuffles use the same mask, and both shuffle within a single
  2378. // vector, then it is worthwhile to move the swizzle after the operation.
  2379. // The type-legalizer generates this pattern when loading illegal
  2380. // vector types from memory. In many cases this allows additional shuffle
  2381. // optimizations.
  2382. // There are other cases where moving the shuffle after the xor/and/or
  2383. // is profitable even if shuffles don't perform a swizzle.
  2384. // If both shuffles use the same mask, and both shuffles have the same first
  2385. // or second operand, then it might still be profitable to move the shuffle
  2386. // after the xor/and/or operation.
  2387. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
  2388. ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
  2389. ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
  2390. assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
  2391. "Inputs to shuffles are not the same type");
  2392. // Check that both shuffles use the same mask. The masks are known to be of
  2393. // the same length because the result vector type is the same.
  2394. // Check also that shuffles have only one use to avoid introducing extra
  2395. // instructions.
  2396. if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
  2397. SVN0->getMask().equals(SVN1->getMask())) {
  2398. SDValue ShOp = N0->getOperand(1);
  2399. // Don't try to fold this node if it requires introducing a
  2400. // build vector of all zeros that might be illegal at this stage.
  2401. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
  2402. if (!LegalTypes)
  2403. ShOp = DAG.getConstant(0, SDLoc(N), VT);
  2404. else
  2405. ShOp = SDValue();
  2406. }
  2407. // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
  2408. // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
  2409. // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
  2410. if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
  2411. SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
  2412. N0->getOperand(0), N1->getOperand(0));
  2413. AddToWorklist(NewNode.getNode());
  2414. return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
  2415. &SVN0->getMask()[0]);
  2416. }
  2417. // Don't try to fold this node if it requires introducing a
  2418. // build vector of all zeros that might be illegal at this stage.
  2419. ShOp = N0->getOperand(0);
  2420. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
  2421. if (!LegalTypes)
  2422. ShOp = DAG.getConstant(0, SDLoc(N), VT);
  2423. else
  2424. ShOp = SDValue();
  2425. }
  2426. // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
  2427. // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
  2428. // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
  2429. if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
  2430. SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
  2431. N0->getOperand(1), N1->getOperand(1));
  2432. AddToWorklist(NewNode.getNode());
  2433. return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
  2434. &SVN0->getMask()[0]);
  2435. }
  2436. }
  2437. }
  2438. return SDValue();
  2439. }
  2440. /// This contains all DAGCombine rules which reduce two values combined by
  2441. /// an And operation to a single value. This makes them reusable in the context
  2442. /// of visitSELECT(). Rules involving constants are not included as
  2443. /// visitSELECT() already handles those cases.
  2444. SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
  2445. SDNode *LocReference) {
  2446. EVT VT = N1.getValueType();
  2447. // fold (and x, undef) -> 0
  2448. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  2449. return DAG.getConstant(0, SDLoc(LocReference), VT);
  2450. // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
  2451. SDValue LL, LR, RL, RR, CC0, CC1;
  2452. if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
  2453. ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
  2454. ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
  2455. if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
  2456. LL.getValueType().isInteger()) {
  2457. // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
  2458. if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
  2459. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
  2460. LR.getValueType(), LL, RL);
  2461. AddToWorklist(ORNode.getNode());
  2462. return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
  2463. }
  2464. if (isAllOnesConstant(LR)) {
  2465. // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
  2466. if (Op1 == ISD::SETEQ) {
  2467. SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
  2468. LR.getValueType(), LL, RL);
  2469. AddToWorklist(ANDNode.getNode());
  2470. return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
  2471. }
  2472. // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
  2473. if (Op1 == ISD::SETGT) {
  2474. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
  2475. LR.getValueType(), LL, RL);
  2476. AddToWorklist(ORNode.getNode());
  2477. return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
  2478. }
  2479. }
  2480. }
  2481. // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
  2482. if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
  2483. Op0 == Op1 && LL.getValueType().isInteger() &&
  2484. Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
  2485. (isAllOnesConstant(LR) && isNullConstant(RR)))) {
  2486. SDLoc DL(N0);
  2487. SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
  2488. LL, DAG.getConstant(1, DL,
  2489. LL.getValueType()));
  2490. AddToWorklist(ADDNode.getNode());
  2491. return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
  2492. DAG.getConstant(2, DL, LL.getValueType()),
  2493. ISD::SETUGE);
  2494. }
  2495. // canonicalize equivalent to ll == rl
  2496. if (LL == RR && LR == RL) {
  2497. Op1 = ISD::getSetCCSwappedOperands(Op1);
  2498. std::swap(RL, RR);
  2499. }
  2500. if (LL == RL && LR == RR) {
  2501. bool isInteger = LL.getValueType().isInteger();
  2502. ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
  2503. if (Result != ISD::SETCC_INVALID &&
  2504. (!LegalOperations ||
  2505. (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
  2506. TLI.isOperationLegal(ISD::SETCC,
  2507. getSetCCResultType(N0.getSimpleValueType())))))
  2508. return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
  2509. LL, LR, Result);
  2510. }
  2511. }
  2512. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
  2513. VT.getSizeInBits() <= 64) {
  2514. if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  2515. APInt ADDC = ADDI->getAPIntValue();
  2516. if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  2517. // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
  2518. // immediate for an add, but it is legal if its top c2 bits are set,
  2519. // transform the ADD so the immediate doesn't need to be materialized
  2520. // in a register.
  2521. if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
  2522. APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
  2523. SRLI->getZExtValue());
  2524. if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
  2525. ADDC |= Mask;
  2526. if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  2527. SDLoc DL(N0);
  2528. SDValue NewAdd =
  2529. DAG.getNode(ISD::ADD, DL, VT,
  2530. N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
  2531. CombineTo(N0.getNode(), NewAdd);
  2532. // Return N so it doesn't get rechecked!
  2533. return SDValue(LocReference, 0);
  2534. }
  2535. }
  2536. }
  2537. }
  2538. }
  2539. }
  2540. return SDValue();
  2541. }
  2542. SDValue DAGCombiner::visitAND(SDNode *N) {
  2543. SDValue N0 = N->getOperand(0);
  2544. SDValue N1 = N->getOperand(1);
  2545. EVT VT = N1.getValueType();
  2546. // fold vector ops
  2547. if (VT.isVector()) {
  2548. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  2549. return FoldedVOp;
  2550. // fold (and x, 0) -> 0, vector edition
  2551. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  2552. // do not return N0, because undef node may exist in N0
  2553. return DAG.getConstant(
  2554. APInt::getNullValue(
  2555. N0.getValueType().getScalarType().getSizeInBits()),
  2556. SDLoc(N), N0.getValueType());
  2557. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2558. // do not return N1, because undef node may exist in N1
  2559. return DAG.getConstant(
  2560. APInt::getNullValue(
  2561. N1.getValueType().getScalarType().getSizeInBits()),
  2562. SDLoc(N), N1.getValueType());
  2563. // fold (and x, -1) -> x, vector edition
  2564. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  2565. return N1;
  2566. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  2567. return N0;
  2568. }
  2569. // fold (and c1, c2) -> c1&c2
  2570. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  2571. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2572. if (N0C && N1C && !N1C->isOpaque())
  2573. return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
  2574. // canonicalize constant to RHS
  2575. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  2576. !isConstantIntBuildVectorOrConstantInt(N1))
  2577. return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
  2578. // fold (and x, -1) -> x
  2579. if (isAllOnesConstant(N1))
  2580. return N0;
  2581. // if (and x, c) is known to be zero, return 0
  2582. unsigned BitWidth = VT.getScalarType().getSizeInBits();
  2583. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  2584. APInt::getAllOnesValue(BitWidth)))
  2585. return DAG.getConstant(0, SDLoc(N), VT);
  2586. // reassociate and
  2587. if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
  2588. return RAND;
  2589. // fold (and (or x, C), D) -> D if (C & D) == D
  2590. if (N1C && N0.getOpcode() == ISD::OR)
  2591. if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
  2592. if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
  2593. return N1;
  2594. // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
  2595. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  2596. SDValue N0Op0 = N0.getOperand(0);
  2597. APInt Mask = ~N1C->getAPIntValue();
  2598. Mask = Mask.trunc(N0Op0.getValueSizeInBits());
  2599. if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
  2600. SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
  2601. N0.getValueType(), N0Op0);
  2602. // Replace uses of the AND with uses of the Zero extend node.
  2603. CombineTo(N, Zext);
  2604. // We actually want to replace all uses of the any_extend with the
  2605. // zero_extend, to avoid duplicating things. This will later cause this
  2606. // AND to be folded.
  2607. CombineTo(N0.getNode(), Zext);
  2608. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2609. }
  2610. }
  2611. // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
  2612. // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
  2613. // already be zero by virtue of the width of the base type of the load.
  2614. //
  2615. // the 'X' node here can either be nothing or an extract_vector_elt to catch
  2616. // more cases.
  2617. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  2618. N0.getOperand(0).getOpcode() == ISD::LOAD) ||
  2619. N0.getOpcode() == ISD::LOAD) {
  2620. LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
  2621. N0 : N0.getOperand(0) );
  2622. // Get the constant (if applicable) the zero'th operand is being ANDed with.
  2623. // This can be a pure constant or a vector splat, in which case we treat the
  2624. // vector as a scalar and use the splat value.
  2625. APInt Constant = APInt::getNullValue(1);
  2626. if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
  2627. Constant = C->getAPIntValue();
  2628. } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
  2629. APInt SplatValue, SplatUndef;
  2630. unsigned SplatBitSize;
  2631. bool HasAnyUndefs;
  2632. bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
  2633. SplatBitSize, HasAnyUndefs);
  2634. if (IsSplat) {
  2635. // Undef bits can contribute to a possible optimisation if set, so
  2636. // set them.
  2637. SplatValue |= SplatUndef;
  2638. // The splat value may be something like "0x00FFFFFF", which means 0 for
  2639. // the first vector value and FF for the rest, repeating. We need a mask
  2640. // that will apply equally to all members of the vector, so AND all the
  2641. // lanes of the constant together.
  2642. EVT VT = Vector->getValueType(0);
  2643. unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
  2644. // If the splat value has been compressed to a bitlength lower
  2645. // than the size of the vector lane, we need to re-expand it to
  2646. // the lane size.
  2647. if (BitWidth > SplatBitSize)
  2648. for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
  2649. SplatBitSize < BitWidth;
  2650. SplatBitSize = SplatBitSize * 2)
  2651. SplatValue |= SplatValue.shl(SplatBitSize);
  2652. // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
  2653. // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
  2654. if (SplatBitSize % BitWidth == 0) {
  2655. Constant = APInt::getAllOnesValue(BitWidth);
  2656. for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
  2657. Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
  2658. }
  2659. }
  2660. }
  2661. // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
  2662. // actually legal and isn't going to get expanded, else this is a false
  2663. // optimisation.
  2664. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
  2665. Load->getValueType(0),
  2666. Load->getMemoryVT());
  2667. // Resize the constant to the same size as the original memory access before
  2668. // extension. If it is still the AllOnesValue then this AND is completely
  2669. // unneeded.
  2670. Constant =
  2671. Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
  2672. bool B;
  2673. switch (Load->getExtensionType()) {
  2674. default: B = false; break;
  2675. case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
  2676. case ISD::ZEXTLOAD:
  2677. case ISD::NON_EXTLOAD: B = true; break;
  2678. }
  2679. if (B && Constant.isAllOnesValue()) {
  2680. // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
  2681. // preserve semantics once we get rid of the AND.
  2682. SDValue NewLoad(Load, 0);
  2683. if (Load->getExtensionType() == ISD::EXTLOAD) {
  2684. NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
  2685. Load->getValueType(0), SDLoc(Load),
  2686. Load->getChain(), Load->getBasePtr(),
  2687. Load->getOffset(), Load->getMemoryVT(),
  2688. Load->getMemOperand());
  2689. // Replace uses of the EXTLOAD with the new ZEXTLOAD.
  2690. if (Load->getNumValues() == 3) {
  2691. // PRE/POST_INC loads have 3 values.
  2692. SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
  2693. NewLoad.getValue(2) };
  2694. CombineTo(Load, To, 3, true);
  2695. } else {
  2696. CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
  2697. }
  2698. }
  2699. // Fold the AND away, taking care not to fold to the old load node if we
  2700. // replaced it.
  2701. CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
  2702. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2703. }
  2704. }
  2705. // fold (and (load x), 255) -> (zextload x, i8)
  2706. // fold (and (extload x, i16), 255) -> (zextload x, i8)
  2707. // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
  2708. if (N1C && (N0.getOpcode() == ISD::LOAD ||
  2709. (N0.getOpcode() == ISD::ANY_EXTEND &&
  2710. N0.getOperand(0).getOpcode() == ISD::LOAD))) {
  2711. bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
  2712. LoadSDNode *LN0 = HasAnyExt
  2713. ? cast<LoadSDNode>(N0.getOperand(0))
  2714. : cast<LoadSDNode>(N0);
  2715. if (LN0->getExtensionType() != ISD::SEXTLOAD &&
  2716. LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
  2717. uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
  2718. if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
  2719. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
  2720. EVT LoadedVT = LN0->getMemoryVT();
  2721. EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
  2722. if (ExtVT == LoadedVT &&
  2723. (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
  2724. ExtVT))) {
  2725. SDValue NewLoad =
  2726. DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
  2727. LN0->getChain(), LN0->getBasePtr(), ExtVT,
  2728. LN0->getMemOperand());
  2729. AddToWorklist(N);
  2730. CombineTo(LN0, NewLoad, NewLoad.getValue(1));
  2731. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2732. }
  2733. // Do not change the width of a volatile load.
  2734. // Do not generate loads of non-round integer types since these can
  2735. // be expensive (and would be wrong if the type is not byte sized).
  2736. if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
  2737. (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
  2738. ExtVT))) {
  2739. EVT PtrType = LN0->getOperand(1).getValueType();
  2740. unsigned Alignment = LN0->getAlignment();
  2741. SDValue NewPtr = LN0->getBasePtr();
  2742. // For big endian targets, we need to add an offset to the pointer
  2743. // to load the correct bytes. For little endian systems, we merely
  2744. // need to read fewer bytes from the same pointer.
  2745. if (DAG.getDataLayout().isBigEndian()) {
  2746. unsigned LVTStoreBytes = LoadedVT.getStoreSize();
  2747. unsigned EVTStoreBytes = ExtVT.getStoreSize();
  2748. unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
  2749. SDLoc DL(LN0);
  2750. NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
  2751. NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
  2752. Alignment = MinAlign(Alignment, PtrOff);
  2753. }
  2754. AddToWorklist(NewPtr.getNode());
  2755. SDValue Load =
  2756. DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
  2757. LN0->getChain(), NewPtr,
  2758. LN0->getPointerInfo(),
  2759. ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
  2760. LN0->isInvariant(), Alignment, LN0->getAAInfo());
  2761. AddToWorklist(N);
  2762. CombineTo(LN0, Load, Load.getValue(1));
  2763. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2764. }
  2765. }
  2766. }
  2767. }
  2768. if (SDValue Combined = visitANDLike(N0, N1, N))
  2769. return Combined;
  2770. // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
  2771. if (N0.getOpcode() == N1.getOpcode()) {
  2772. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  2773. if (Tmp.getNode()) return Tmp;
  2774. }
  2775. // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
  2776. // fold (and (sra)) -> (and (srl)) when possible.
  2777. if (!VT.isVector() &&
  2778. SimplifyDemandedBits(SDValue(N, 0)))
  2779. return SDValue(N, 0);
  2780. // fold (zext_inreg (extload x)) -> (zextload x)
  2781. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
  2782. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  2783. EVT MemVT = LN0->getMemoryVT();
  2784. // If we zero all the possible extended bits, then we can turn this into
  2785. // a zextload if we are running before legalize or the operation is legal.
  2786. unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
  2787. if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
  2788. BitWidth - MemVT.getScalarType().getSizeInBits())) &&
  2789. ((!LegalOperations && !LN0->isVolatile()) ||
  2790. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
  2791. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
  2792. LN0->getChain(), LN0->getBasePtr(),
  2793. MemVT, LN0->getMemOperand());
  2794. AddToWorklist(N);
  2795. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  2796. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2797. }
  2798. }
  2799. // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
  2800. if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  2801. N0.hasOneUse()) {
  2802. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  2803. EVT MemVT = LN0->getMemoryVT();
  2804. // If we zero all the possible extended bits, then we can turn this into
  2805. // a zextload if we are running before legalize or the operation is legal.
  2806. unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
  2807. if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
  2808. BitWidth - MemVT.getScalarType().getSizeInBits())) &&
  2809. ((!LegalOperations && !LN0->isVolatile()) ||
  2810. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
  2811. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
  2812. LN0->getChain(), LN0->getBasePtr(),
  2813. MemVT, LN0->getMemOperand());
  2814. AddToWorklist(N);
  2815. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  2816. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2817. }
  2818. }
  2819. // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
  2820. if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
  2821. SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  2822. N0.getOperand(1), false);
  2823. if (BSwap.getNode())
  2824. return BSwap;
  2825. }
  2826. return SDValue();
  2827. }
  2828. /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
  2829. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  2830. bool DemandHighBits) {
  2831. if (!LegalOperations)
  2832. return SDValue();
  2833. EVT VT = N->getValueType(0);
  2834. if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
  2835. return SDValue();
  2836. if (!TLI.isOperationLegal(ISD::BSWAP, VT))
  2837. return SDValue();
  2838. // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
  2839. bool LookPassAnd0 = false;
  2840. bool LookPassAnd1 = false;
  2841. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
  2842. std::swap(N0, N1);
  2843. if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
  2844. std::swap(N0, N1);
  2845. if (N0.getOpcode() == ISD::AND) {
  2846. if (!N0.getNode()->hasOneUse())
  2847. return SDValue();
  2848. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2849. if (!N01C || N01C->getZExtValue() != 0xFF00)
  2850. return SDValue();
  2851. N0 = N0.getOperand(0);
  2852. LookPassAnd0 = true;
  2853. }
  2854. if (N1.getOpcode() == ISD::AND) {
  2855. if (!N1.getNode()->hasOneUse())
  2856. return SDValue();
  2857. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  2858. if (!N11C || N11C->getZExtValue() != 0xFF)
  2859. return SDValue();
  2860. N1 = N1.getOperand(0);
  2861. LookPassAnd1 = true;
  2862. }
  2863. if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
  2864. std::swap(N0, N1);
  2865. if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
  2866. return SDValue();
  2867. if (!N0.getNode()->hasOneUse() ||
  2868. !N1.getNode()->hasOneUse())
  2869. return SDValue();
  2870. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2871. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  2872. if (!N01C || !N11C)
  2873. return SDValue();
  2874. if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
  2875. return SDValue();
  2876. // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
  2877. SDValue N00 = N0->getOperand(0);
  2878. if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
  2879. if (!N00.getNode()->hasOneUse())
  2880. return SDValue();
  2881. ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
  2882. if (!N001C || N001C->getZExtValue() != 0xFF)
  2883. return SDValue();
  2884. N00 = N00.getOperand(0);
  2885. LookPassAnd0 = true;
  2886. }
  2887. SDValue N10 = N1->getOperand(0);
  2888. if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
  2889. if (!N10.getNode()->hasOneUse())
  2890. return SDValue();
  2891. ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
  2892. if (!N101C || N101C->getZExtValue() != 0xFF00)
  2893. return SDValue();
  2894. N10 = N10.getOperand(0);
  2895. LookPassAnd1 = true;
  2896. }
  2897. if (N00 != N10)
  2898. return SDValue();
  2899. // Make sure everything beyond the low halfword gets set to zero since the SRL
  2900. // 16 will clear the top bits.
  2901. unsigned OpSizeInBits = VT.getSizeInBits();
  2902. if (DemandHighBits && OpSizeInBits > 16) {
  2903. // If the left-shift isn't masked out then the only way this is a bswap is
  2904. // if all bits beyond the low 8 are 0. In that case the entire pattern
  2905. // reduces to a left shift anyway: leave it for other parts of the combiner.
  2906. if (!LookPassAnd0)
  2907. return SDValue();
  2908. // However, if the right shift isn't masked out then it might be because
  2909. // it's not needed. See if we can spot that too.
  2910. if (!LookPassAnd1 &&
  2911. !DAG.MaskedValueIsZero(
  2912. N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
  2913. return SDValue();
  2914. }
  2915. SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
  2916. if (OpSizeInBits > 16) {
  2917. SDLoc DL(N);
  2918. Res = DAG.getNode(ISD::SRL, DL, VT, Res,
  2919. DAG.getConstant(OpSizeInBits - 16, DL,
  2920. getShiftAmountTy(VT)));
  2921. }
  2922. return Res;
  2923. }
  2924. /// Return true if the specified node is an element that makes up a 32-bit
  2925. /// packed halfword byteswap.
  2926. /// ((x & 0x000000ff) << 8) |
  2927. /// ((x & 0x0000ff00) >> 8) |
  2928. /// ((x & 0x00ff0000) << 8) |
  2929. /// ((x & 0xff000000) >> 8)
  2930. static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
  2931. if (!N.getNode()->hasOneUse())
  2932. return false;
  2933. unsigned Opc = N.getOpcode();
  2934. if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
  2935. return false;
  2936. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2937. if (!N1C)
  2938. return false;
  2939. unsigned Num;
  2940. switch (N1C->getZExtValue()) {
  2941. default:
  2942. return false;
  2943. case 0xFF: Num = 0; break;
  2944. case 0xFF00: Num = 1; break;
  2945. case 0xFF0000: Num = 2; break;
  2946. case 0xFF000000: Num = 3; break;
  2947. }
  2948. // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
  2949. SDValue N0 = N.getOperand(0);
  2950. if (Opc == ISD::AND) {
  2951. if (Num == 0 || Num == 2) {
  2952. // (x >> 8) & 0xff
  2953. // (x >> 8) & 0xff0000
  2954. if (N0.getOpcode() != ISD::SRL)
  2955. return false;
  2956. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2957. if (!C || C->getZExtValue() != 8)
  2958. return false;
  2959. } else {
  2960. // (x << 8) & 0xff00
  2961. // (x << 8) & 0xff000000
  2962. if (N0.getOpcode() != ISD::SHL)
  2963. return false;
  2964. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2965. if (!C || C->getZExtValue() != 8)
  2966. return false;
  2967. }
  2968. } else if (Opc == ISD::SHL) {
  2969. // (x & 0xff) << 8
  2970. // (x & 0xff0000) << 8
  2971. if (Num != 0 && Num != 2)
  2972. return false;
  2973. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2974. if (!C || C->getZExtValue() != 8)
  2975. return false;
  2976. } else { // Opc == ISD::SRL
  2977. // (x & 0xff00) >> 8
  2978. // (x & 0xff000000) >> 8
  2979. if (Num != 1 && Num != 3)
  2980. return false;
  2981. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2982. if (!C || C->getZExtValue() != 8)
  2983. return false;
  2984. }
  2985. if (Parts[Num])
  2986. return false;
  2987. Parts[Num] = N0.getOperand(0).getNode();
  2988. return true;
  2989. }
  2990. /// Match a 32-bit packed halfword bswap. That is
  2991. /// ((x & 0x000000ff) << 8) |
  2992. /// ((x & 0x0000ff00) >> 8) |
  2993. /// ((x & 0x00ff0000) << 8) |
  2994. /// ((x & 0xff000000) >> 8)
  2995. /// => (rotl (bswap x), 16)
  2996. SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
  2997. if (!LegalOperations)
  2998. return SDValue();
  2999. EVT VT = N->getValueType(0);
  3000. if (VT != MVT::i32)
  3001. return SDValue();
  3002. if (!TLI.isOperationLegal(ISD::BSWAP, VT))
  3003. return SDValue();
  3004. // Look for either
  3005. // (or (or (and), (and)), (or (and), (and)))
  3006. // (or (or (or (and), (and)), (and)), (and))
  3007. if (N0.getOpcode() != ISD::OR)
  3008. return SDValue();
  3009. SDValue N00 = N0.getOperand(0);
  3010. SDValue N01 = N0.getOperand(1);
  3011. SDNode *Parts[4] = {};
  3012. if (N1.getOpcode() == ISD::OR &&
  3013. N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
  3014. // (or (or (and), (and)), (or (and), (and)))
  3015. SDValue N000 = N00.getOperand(0);
  3016. if (!isBSwapHWordElement(N000, Parts))
  3017. return SDValue();
  3018. SDValue N001 = N00.getOperand(1);
  3019. if (!isBSwapHWordElement(N001, Parts))
  3020. return SDValue();
  3021. SDValue N010 = N01.getOperand(0);
  3022. if (!isBSwapHWordElement(N010, Parts))
  3023. return SDValue();
  3024. SDValue N011 = N01.getOperand(1);
  3025. if (!isBSwapHWordElement(N011, Parts))
  3026. return SDValue();
  3027. } else {
  3028. // (or (or (or (and), (and)), (and)), (and))
  3029. if (!isBSwapHWordElement(N1, Parts))
  3030. return SDValue();
  3031. if (!isBSwapHWordElement(N01, Parts))
  3032. return SDValue();
  3033. if (N00.getOpcode() != ISD::OR)
  3034. return SDValue();
  3035. SDValue N000 = N00.getOperand(0);
  3036. if (!isBSwapHWordElement(N000, Parts))
  3037. return SDValue();
  3038. SDValue N001 = N00.getOperand(1);
  3039. if (!isBSwapHWordElement(N001, Parts))
  3040. return SDValue();
  3041. }
  3042. // Make sure the parts are all coming from the same node.
  3043. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
  3044. return SDValue();
  3045. SDLoc DL(N);
  3046. SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
  3047. SDValue(Parts[0], 0));
  3048. // Result of the bswap should be rotated by 16. If it's not legal, then
  3049. // do (x << 16) | (x >> 16).
  3050. SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
  3051. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
  3052. return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
  3053. if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
  3054. return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
  3055. return DAG.getNode(ISD::OR, DL, VT,
  3056. DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
  3057. DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
  3058. }
  3059. /// This contains all DAGCombine rules which reduce two values combined by
  3060. /// an Or operation to a single value \see visitANDLike().
  3061. SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
  3062. EVT VT = N1.getValueType();
  3063. // fold (or x, undef) -> -1
  3064. if (!LegalOperations &&
  3065. (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
  3066. EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
  3067. return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
  3068. SDLoc(LocReference), VT);
  3069. }
  3070. // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
  3071. SDValue LL, LR, RL, RR, CC0, CC1;
  3072. if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
  3073. ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
  3074. ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
  3075. if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
  3076. // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
  3077. // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
  3078. if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
  3079. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
  3080. LR.getValueType(), LL, RL);
  3081. AddToWorklist(ORNode.getNode());
  3082. return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
  3083. }
  3084. // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
  3085. // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
  3086. if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
  3087. SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
  3088. LR.getValueType(), LL, RL);
  3089. AddToWorklist(ANDNode.getNode());
  3090. return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
  3091. }
  3092. }
  3093. // canonicalize equivalent to ll == rl
  3094. if (LL == RR && LR == RL) {
  3095. Op1 = ISD::getSetCCSwappedOperands(Op1);
  3096. std::swap(RL, RR);
  3097. }
  3098. if (LL == RL && LR == RR) {
  3099. bool isInteger = LL.getValueType().isInteger();
  3100. ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
  3101. if (Result != ISD::SETCC_INVALID &&
  3102. (!LegalOperations ||
  3103. (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
  3104. TLI.isOperationLegal(ISD::SETCC,
  3105. getSetCCResultType(N0.getValueType())))))
  3106. return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
  3107. LL, LR, Result);
  3108. }
  3109. }
  3110. // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
  3111. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
  3112. // Don't increase # computations.
  3113. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
  3114. // We can only do this xform if we know that bits from X that are set in C2
  3115. // but not in C1 are already zero. Likewise for Y.
  3116. if (const ConstantSDNode *N0O1C =
  3117. getAsNonOpaqueConstant(N0.getOperand(1))) {
  3118. if (const ConstantSDNode *N1O1C =
  3119. getAsNonOpaqueConstant(N1.getOperand(1))) {
  3120. // We can only do this xform if we know that bits from X that are set in
  3121. // C2 but not in C1 are already zero. Likewise for Y.
  3122. const APInt &LHSMask = N0O1C->getAPIntValue();
  3123. const APInt &RHSMask = N1O1C->getAPIntValue();
  3124. if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
  3125. DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
  3126. SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
  3127. N0.getOperand(0), N1.getOperand(0));
  3128. SDLoc DL(LocReference);
  3129. return DAG.getNode(ISD::AND, DL, VT, X,
  3130. DAG.getConstant(LHSMask | RHSMask, DL, VT));
  3131. }
  3132. }
  3133. }
  3134. }
  3135. // (or (and X, M), (and X, N)) -> (and X, (or M, N))
  3136. if (N0.getOpcode() == ISD::AND &&
  3137. N1.getOpcode() == ISD::AND &&
  3138. N0.getOperand(0) == N1.getOperand(0) &&
  3139. // Don't increase # computations.
  3140. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
  3141. SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
  3142. N0.getOperand(1), N1.getOperand(1));
  3143. return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
  3144. }
  3145. return SDValue();
  3146. }
  3147. SDValue DAGCombiner::visitOR(SDNode *N) {
  3148. SDValue N0 = N->getOperand(0);
  3149. SDValue N1 = N->getOperand(1);
  3150. EVT VT = N1.getValueType();
  3151. // fold vector ops
  3152. if (VT.isVector()) {
  3153. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3154. return FoldedVOp;
  3155. // fold (or x, 0) -> x, vector edition
  3156. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  3157. return N1;
  3158. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  3159. return N0;
  3160. // fold (or x, -1) -> -1, vector edition
  3161. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  3162. // do not return N0, because undef node may exist in N0
  3163. return DAG.getConstant(
  3164. APInt::getAllOnesValue(
  3165. N0.getValueType().getScalarType().getSizeInBits()),
  3166. SDLoc(N), N0.getValueType());
  3167. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  3168. // do not return N1, because undef node may exist in N1
  3169. return DAG.getConstant(
  3170. APInt::getAllOnesValue(
  3171. N1.getValueType().getScalarType().getSizeInBits()),
  3172. SDLoc(N), N1.getValueType());
  3173. // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
  3174. // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
  3175. // Do this only if the resulting shuffle is legal.
  3176. if (isa<ShuffleVectorSDNode>(N0) &&
  3177. isa<ShuffleVectorSDNode>(N1) &&
  3178. // Avoid folding a node with illegal type.
  3179. TLI.isTypeLegal(VT) &&
  3180. N0->getOperand(1) == N1->getOperand(1) &&
  3181. ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
  3182. bool CanFold = true;
  3183. unsigned NumElts = VT.getVectorNumElements();
  3184. const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
  3185. const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
  3186. // We construct two shuffle masks:
  3187. // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
  3188. // and N1 as the second operand.
  3189. // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
  3190. // and N0 as the second operand.
  3191. // We do this because OR is commutable and therefore there might be
  3192. // two ways to fold this node into a shuffle.
  3193. SmallVector<int,4> Mask1;
  3194. SmallVector<int,4> Mask2;
  3195. for (unsigned i = 0; i != NumElts && CanFold; ++i) {
  3196. int M0 = SV0->getMaskElt(i);
  3197. int M1 = SV1->getMaskElt(i);
  3198. // Both shuffle indexes are undef. Propagate Undef.
  3199. if (M0 < 0 && M1 < 0) {
  3200. Mask1.push_back(M0);
  3201. Mask2.push_back(M0);
  3202. continue;
  3203. }
  3204. if (M0 < 0 || M1 < 0 ||
  3205. (M0 < (int)NumElts && M1 < (int)NumElts) ||
  3206. (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
  3207. CanFold = false;
  3208. break;
  3209. }
  3210. Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
  3211. Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
  3212. }
  3213. if (CanFold) {
  3214. // Fold this sequence only if the resulting shuffle is 'legal'.
  3215. if (TLI.isShuffleMaskLegal(Mask1, VT))
  3216. return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
  3217. N1->getOperand(0), &Mask1[0]);
  3218. if (TLI.isShuffleMaskLegal(Mask2, VT))
  3219. return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
  3220. N0->getOperand(0), &Mask2[0]);
  3221. }
  3222. }
  3223. }
  3224. // fold (or c1, c2) -> c1|c2
  3225. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  3226. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3227. if (N0C && N1C && !N1C->isOpaque())
  3228. return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
  3229. // canonicalize constant to RHS
  3230. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  3231. !isConstantIntBuildVectorOrConstantInt(N1))
  3232. return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
  3233. // fold (or x, 0) -> x
  3234. if (isNullConstant(N1))
  3235. return N0;
  3236. // fold (or x, -1) -> -1
  3237. if (isAllOnesConstant(N1))
  3238. return N1;
  3239. // fold (or x, c) -> c iff (x & ~c) == 0
  3240. if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
  3241. return N1;
  3242. if (SDValue Combined = visitORLike(N0, N1, N))
  3243. return Combined;
  3244. // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
  3245. SDValue BSwap = MatchBSwapHWord(N, N0, N1);
  3246. if (BSwap.getNode())
  3247. return BSwap;
  3248. BSwap = MatchBSwapHWordLow(N, N0, N1);
  3249. if (BSwap.getNode())
  3250. return BSwap;
  3251. // reassociate or
  3252. if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
  3253. return ROR;
  3254. // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
  3255. // iff (c1 & c2) == 0.
  3256. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
  3257. isa<ConstantSDNode>(N0.getOperand(1))) {
  3258. ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
  3259. if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
  3260. if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
  3261. N1C, C1))
  3262. return DAG.getNode(
  3263. ISD::AND, SDLoc(N), VT,
  3264. DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
  3265. return SDValue();
  3266. }
  3267. }
  3268. // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
  3269. if (N0.getOpcode() == N1.getOpcode()) {
  3270. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  3271. if (Tmp.getNode()) return Tmp;
  3272. }
  3273. // See if this is some rotate idiom.
  3274. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
  3275. return SDValue(Rot, 0);
  3276. // Simplify the operands using demanded-bits information.
  3277. if (!VT.isVector() &&
  3278. SimplifyDemandedBits(SDValue(N, 0)))
  3279. return SDValue(N, 0);
  3280. return SDValue();
  3281. }
  3282. /// Match "(X shl/srl V1) & V2" where V2 may not be present.
  3283. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
  3284. if (Op.getOpcode() == ISD::AND) {
  3285. if (isa<ConstantSDNode>(Op.getOperand(1))) {
  3286. Mask = Op.getOperand(1);
  3287. Op = Op.getOperand(0);
  3288. } else {
  3289. return false;
  3290. }
  3291. }
  3292. if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
  3293. Shift = Op;
  3294. return true;
  3295. }
  3296. return false;
  3297. }
  3298. // Return true if we can prove that, whenever Neg and Pos are both in the
  3299. // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
  3300. // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
  3301. //
  3302. // (or (shift1 X, Neg), (shift2 X, Pos))
  3303. //
  3304. // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
  3305. // in direction shift1 by Neg. The range [0, OpSize) means that we only need
  3306. // to consider shift amounts with defined behavior.
  3307. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
  3308. // If OpSize is a power of 2 then:
  3309. //
  3310. // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
  3311. // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
  3312. //
  3313. // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
  3314. // for the stronger condition:
  3315. //
  3316. // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
  3317. //
  3318. // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
  3319. // we can just replace Neg with Neg' for the rest of the function.
  3320. //
  3321. // In other cases we check for the even stronger condition:
  3322. //
  3323. // Neg == OpSize - Pos [B]
  3324. //
  3325. // for all Neg and Pos. Note that the (or ...) then invokes undefined
  3326. // behavior if Pos == 0 (and consequently Neg == OpSize).
  3327. //
  3328. // We could actually use [A] whenever OpSize is a power of 2, but the
  3329. // only extra cases that it would match are those uninteresting ones
  3330. // where Neg and Pos are never in range at the same time. E.g. for
  3331. // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
  3332. // as well as (sub 32, Pos), but:
  3333. //
  3334. // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
  3335. //
  3336. // always invokes undefined behavior for 32-bit X.
  3337. //
  3338. // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
  3339. unsigned MaskLoBits = 0;
  3340. if (Neg.getOpcode() == ISD::AND &&
  3341. isPowerOf2_64(OpSize) &&
  3342. Neg.getOperand(1).getOpcode() == ISD::Constant &&
  3343. cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
  3344. Neg = Neg.getOperand(0);
  3345. MaskLoBits = Log2_64(OpSize);
  3346. }
  3347. // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
  3348. if (Neg.getOpcode() != ISD::SUB)
  3349. return 0;
  3350. ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
  3351. if (!NegC)
  3352. return 0;
  3353. SDValue NegOp1 = Neg.getOperand(1);
  3354. // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
  3355. // Pos'. The truncation is redundant for the purpose of the equality.
  3356. if (MaskLoBits &&
  3357. Pos.getOpcode() == ISD::AND &&
  3358. Pos.getOperand(1).getOpcode() == ISD::Constant &&
  3359. cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
  3360. Pos = Pos.getOperand(0);
  3361. // The condition we need is now:
  3362. //
  3363. // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
  3364. //
  3365. // If NegOp1 == Pos then we need:
  3366. //
  3367. // OpSize & Mask == NegC & Mask
  3368. //
  3369. // (because "x & Mask" is a truncation and distributes through subtraction).
  3370. APInt Width;
  3371. if (Pos == NegOp1)
  3372. Width = NegC->getAPIntValue();
  3373. // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
  3374. // Then the condition we want to prove becomes:
  3375. //
  3376. // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
  3377. //
  3378. // which, again because "x & Mask" is a truncation, becomes:
  3379. //
  3380. // NegC & Mask == (OpSize - PosC) & Mask
  3381. // OpSize & Mask == (NegC + PosC) & Mask
  3382. else if (Pos.getOpcode() == ISD::ADD &&
  3383. Pos.getOperand(0) == NegOp1 &&
  3384. Pos.getOperand(1).getOpcode() == ISD::Constant)
  3385. Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
  3386. NegC->getAPIntValue());
  3387. else
  3388. return false;
  3389. // Now we just need to check that OpSize & Mask == Width & Mask.
  3390. if (MaskLoBits)
  3391. // Opsize & Mask is 0 since Mask is Opsize - 1.
  3392. return Width.getLoBits(MaskLoBits) == 0;
  3393. return Width == OpSize;
  3394. }
  3395. // A subroutine of MatchRotate used once we have found an OR of two opposite
  3396. // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
  3397. // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
  3398. // former being preferred if supported. InnerPos and InnerNeg are Pos and
  3399. // Neg with outer conversions stripped away.
  3400. SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
  3401. SDValue Neg, SDValue InnerPos,
  3402. SDValue InnerNeg, unsigned PosOpcode,
  3403. unsigned NegOpcode, SDLoc DL) {
  3404. // fold (or (shl x, (*ext y)),
  3405. // (srl x, (*ext (sub 32, y)))) ->
  3406. // (rotl x, y) or (rotr x, (sub 32, y))
  3407. //
  3408. // fold (or (shl x, (*ext (sub 32, y))),
  3409. // (srl x, (*ext y))) ->
  3410. // (rotr x, y) or (rotl x, (sub 32, y))
  3411. EVT VT = Shifted.getValueType();
  3412. if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
  3413. bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
  3414. return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
  3415. HasPos ? Pos : Neg).getNode();
  3416. }
  3417. return nullptr;
  3418. }
  3419. // MatchRotate - Handle an 'or' of two operands. If this is one of the many
  3420. // idioms for rotate, and if the target supports rotation instructions, generate
  3421. // a rot[lr].
  3422. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
  3423. // Must be a legal type. Expanded 'n promoted things won't work with rotates.
  3424. EVT VT = LHS.getValueType();
  3425. if (!TLI.isTypeLegal(VT)) return nullptr;
  3426. // The target must have at least one rotate flavor.
  3427. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
  3428. bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
  3429. if (!HasROTL && !HasROTR) return nullptr;
  3430. // Match "(X shl/srl V1) & V2" where V2 may not be present.
  3431. SDValue LHSShift; // The shift.
  3432. SDValue LHSMask; // AND value if any.
  3433. if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
  3434. return nullptr; // Not part of a rotate.
  3435. SDValue RHSShift; // The shift.
  3436. SDValue RHSMask; // AND value if any.
  3437. if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
  3438. return nullptr; // Not part of a rotate.
  3439. if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
  3440. return nullptr; // Not shifting the same value.
  3441. if (LHSShift.getOpcode() == RHSShift.getOpcode())
  3442. return nullptr; // Shifts must disagree.
  3443. // Canonicalize shl to left side in a shl/srl pair.
  3444. if (RHSShift.getOpcode() == ISD::SHL) {
  3445. std::swap(LHS, RHS);
  3446. std::swap(LHSShift, RHSShift);
  3447. std::swap(LHSMask , RHSMask );
  3448. }
  3449. unsigned OpSizeInBits = VT.getSizeInBits();
  3450. SDValue LHSShiftArg = LHSShift.getOperand(0);
  3451. SDValue LHSShiftAmt = LHSShift.getOperand(1);
  3452. SDValue RHSShiftArg = RHSShift.getOperand(0);
  3453. SDValue RHSShiftAmt = RHSShift.getOperand(1);
  3454. // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
  3455. // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
  3456. if (LHSShiftAmt.getOpcode() == ISD::Constant &&
  3457. RHSShiftAmt.getOpcode() == ISD::Constant) {
  3458. uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
  3459. uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
  3460. if ((LShVal + RShVal) != OpSizeInBits)
  3461. return nullptr;
  3462. SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
  3463. LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
  3464. // If there is an AND of either shifted operand, apply it to the result.
  3465. if (LHSMask.getNode() || RHSMask.getNode()) {
  3466. APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
  3467. if (LHSMask.getNode()) {
  3468. APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
  3469. Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
  3470. }
  3471. if (RHSMask.getNode()) {
  3472. APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
  3473. Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
  3474. }
  3475. Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
  3476. }
  3477. return Rot.getNode();
  3478. }
  3479. // If there is a mask here, and we have a variable shift, we can't be sure
  3480. // that we're masking out the right stuff.
  3481. if (LHSMask.getNode() || RHSMask.getNode())
  3482. return nullptr;
  3483. // If the shift amount is sign/zext/any-extended just peel it off.
  3484. SDValue LExtOp0 = LHSShiftAmt;
  3485. SDValue RExtOp0 = RHSShiftAmt;
  3486. if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  3487. LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  3488. LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  3489. LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
  3490. (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  3491. RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  3492. RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  3493. RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
  3494. LExtOp0 = LHSShiftAmt.getOperand(0);
  3495. RExtOp0 = RHSShiftAmt.getOperand(0);
  3496. }
  3497. SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
  3498. LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
  3499. if (TryL)
  3500. return TryL;
  3501. SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
  3502. RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
  3503. if (TryR)
  3504. return TryR;
  3505. return nullptr;
  3506. }
  3507. SDValue DAGCombiner::visitXOR(SDNode *N) {
  3508. SDValue N0 = N->getOperand(0);
  3509. SDValue N1 = N->getOperand(1);
  3510. EVT VT = N0.getValueType();
  3511. // fold vector ops
  3512. if (VT.isVector()) {
  3513. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3514. return FoldedVOp;
  3515. // fold (xor x, 0) -> x, vector edition
  3516. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  3517. return N1;
  3518. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  3519. return N0;
  3520. }
  3521. // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
  3522. if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
  3523. return DAG.getConstant(0, SDLoc(N), VT);
  3524. // fold (xor x, undef) -> undef
  3525. if (N0.getOpcode() == ISD::UNDEF)
  3526. return N0;
  3527. if (N1.getOpcode() == ISD::UNDEF)
  3528. return N1;
  3529. // fold (xor c1, c2) -> c1^c2
  3530. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  3531. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  3532. if (N0C && N1C)
  3533. return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
  3534. // canonicalize constant to RHS
  3535. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  3536. !isConstantIntBuildVectorOrConstantInt(N1))
  3537. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
  3538. // fold (xor x, 0) -> x
  3539. if (isNullConstant(N1))
  3540. return N0;
  3541. // reassociate xor
  3542. if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
  3543. return RXOR;
  3544. // fold !(x cc y) -> (x !cc y)
  3545. SDValue LHS, RHS, CC;
  3546. if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
  3547. bool isInt = LHS.getValueType().isInteger();
  3548. ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
  3549. isInt);
  3550. if (!LegalOperations ||
  3551. TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
  3552. switch (N0.getOpcode()) {
  3553. default:
  3554. llvm_unreachable("Unhandled SetCC Equivalent!");
  3555. case ISD::SETCC:
  3556. return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
  3557. case ISD::SELECT_CC:
  3558. return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
  3559. N0.getOperand(3), NotCC);
  3560. }
  3561. }
  3562. }
  3563. // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
  3564. if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
  3565. N0.getNode()->hasOneUse() &&
  3566. isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
  3567. SDValue V = N0.getOperand(0);
  3568. SDLoc DL(N0);
  3569. V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
  3570. DAG.getConstant(1, DL, V.getValueType()));
  3571. AddToWorklist(V.getNode());
  3572. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
  3573. }
  3574. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
  3575. if (isOneConstant(N1) && VT == MVT::i1 &&
  3576. (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
  3577. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  3578. if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
  3579. unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
  3580. LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
  3581. RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
  3582. AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
  3583. return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
  3584. }
  3585. }
  3586. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
  3587. if (isAllOnesConstant(N1) &&
  3588. (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
  3589. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  3590. if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
  3591. unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
  3592. LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
  3593. RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
  3594. AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
  3595. return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
  3596. }
  3597. }
  3598. // fold (xor (and x, y), y) -> (and (not x), y)
  3599. if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
  3600. N0->getOperand(1) == N1) {
  3601. SDValue X = N0->getOperand(0);
  3602. SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
  3603. AddToWorklist(NotX.getNode());
  3604. return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
  3605. }
  3606. // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
  3607. if (N1C && N0.getOpcode() == ISD::XOR) {
  3608. if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
  3609. SDLoc DL(N);
  3610. return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
  3611. DAG.getConstant(N1C->getAPIntValue() ^
  3612. N00C->getAPIntValue(), DL, VT));
  3613. }
  3614. if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
  3615. SDLoc DL(N);
  3616. return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
  3617. DAG.getConstant(N1C->getAPIntValue() ^
  3618. N01C->getAPIntValue(), DL, VT));
  3619. }
  3620. }
  3621. // fold (xor x, x) -> 0
  3622. if (N0 == N1)
  3623. return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
  3624. // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
  3625. // Here is a concrete example of this equivalence:
  3626. // i16 x == 14
  3627. // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
  3628. // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
  3629. //
  3630. // =>
  3631. //
  3632. // i16 ~1 == 0b1111111111111110
  3633. // i16 rol(~1, 14) == 0b1011111111111111
  3634. //
  3635. // Some additional tips to help conceptualize this transform:
  3636. // - Try to see the operation as placing a single zero in a value of all ones.
  3637. // - There exists no value for x which would allow the result to contain zero.
  3638. // - Values of x larger than the bitwidth are undefined and do not require a
  3639. // consistent result.
  3640. // - Pushing the zero left requires shifting one bits in from the right.
  3641. // A rotate left of ~1 is a nice way of achieving the desired result.
  3642. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
  3643. && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
  3644. SDLoc DL(N);
  3645. return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
  3646. N0.getOperand(1));
  3647. }
  3648. // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
  3649. if (N0.getOpcode() == N1.getOpcode()) {
  3650. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  3651. if (Tmp.getNode()) return Tmp;
  3652. }
  3653. // Simplify the expression using non-local knowledge.
  3654. if (!VT.isVector() &&
  3655. SimplifyDemandedBits(SDValue(N, 0)))
  3656. return SDValue(N, 0);
  3657. return SDValue();
  3658. }
  3659. /// Handle transforms common to the three shifts, when the shift amount is a
  3660. /// constant.
  3661. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
  3662. SDNode *LHS = N->getOperand(0).getNode();
  3663. if (!LHS->hasOneUse()) return SDValue();
  3664. // We want to pull some binops through shifts, so that we have (and (shift))
  3665. // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
  3666. // thing happens with address calculations, so it's important to canonicalize
  3667. // it.
  3668. bool HighBitSet = false; // Can we transform this if the high bit is set?
  3669. switch (LHS->getOpcode()) {
  3670. default: return SDValue();
  3671. case ISD::OR:
  3672. case ISD::XOR:
  3673. HighBitSet = false; // We can only transform sra if the high bit is clear.
  3674. break;
  3675. case ISD::AND:
  3676. HighBitSet = true; // We can only transform sra if the high bit is set.
  3677. break;
  3678. case ISD::ADD:
  3679. if (N->getOpcode() != ISD::SHL)
  3680. return SDValue(); // only shl(add) not sr[al](add).
  3681. HighBitSet = false; // We can only transform sra if the high bit is clear.
  3682. break;
  3683. }
  3684. // We require the RHS of the binop to be a constant and not opaque as well.
  3685. ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
  3686. if (!BinOpCst) return SDValue();
  3687. // FIXME: disable this unless the input to the binop is a shift by a constant.
  3688. // If it is not a shift, it pessimizes some common cases like:
  3689. //
  3690. // void foo(int *X, int i) { X[i & 1235] = 1; }
  3691. // int bar(int *X, int i) { return X[i & 255]; }
  3692. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
  3693. if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
  3694. BinOpLHSVal->getOpcode() != ISD::SRA &&
  3695. BinOpLHSVal->getOpcode() != ISD::SRL) ||
  3696. !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
  3697. return SDValue();
  3698. EVT VT = N->getValueType(0);
  3699. // If this is a signed shift right, and the high bit is modified by the
  3700. // logical operation, do not perform the transformation. The highBitSet
  3701. // boolean indicates the value of the high bit of the constant which would
  3702. // cause it to be modified for this operation.
  3703. if (N->getOpcode() == ISD::SRA) {
  3704. bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
  3705. if (BinOpRHSSignSet != HighBitSet)
  3706. return SDValue();
  3707. }
  3708. if (!TLI.isDesirableToCommuteWithShift(LHS))
  3709. return SDValue();
  3710. // Fold the constants, shifting the binop RHS by the shift amount.
  3711. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
  3712. N->getValueType(0),
  3713. LHS->getOperand(1), N->getOperand(1));
  3714. assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
  3715. // Create the new shift.
  3716. SDValue NewShift = DAG.getNode(N->getOpcode(),
  3717. SDLoc(LHS->getOperand(0)),
  3718. VT, LHS->getOperand(0), N->getOperand(1));
  3719. // Create the new binop.
  3720. return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
  3721. }
  3722. SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
  3723. assert(N->getOpcode() == ISD::TRUNCATE);
  3724. assert(N->getOperand(0).getOpcode() == ISD::AND);
  3725. // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
  3726. if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
  3727. SDValue N01 = N->getOperand(0).getOperand(1);
  3728. if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
  3729. if (!N01C->isOpaque()) {
  3730. EVT TruncVT = N->getValueType(0);
  3731. SDValue N00 = N->getOperand(0).getOperand(0);
  3732. APInt TruncC = N01C->getAPIntValue();
  3733. TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
  3734. SDLoc DL(N);
  3735. return DAG.getNode(ISD::AND, DL, TruncVT,
  3736. DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
  3737. DAG.getConstant(TruncC, DL, TruncVT));
  3738. }
  3739. }
  3740. }
  3741. return SDValue();
  3742. }
  3743. SDValue DAGCombiner::visitRotate(SDNode *N) {
  3744. // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
  3745. if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
  3746. N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
  3747. SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
  3748. if (NewOp1.getNode())
  3749. return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
  3750. N->getOperand(0), NewOp1);
  3751. }
  3752. return SDValue();
  3753. }
  3754. SDValue DAGCombiner::visitSHL(SDNode *N) {
  3755. SDValue N0 = N->getOperand(0);
  3756. SDValue N1 = N->getOperand(1);
  3757. EVT VT = N0.getValueType();
  3758. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  3759. // fold vector ops
  3760. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3761. if (VT.isVector()) {
  3762. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3763. return FoldedVOp;
  3764. BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
  3765. // If setcc produces all-one true value then:
  3766. // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
  3767. if (N1CV && N1CV->isConstant()) {
  3768. if (N0.getOpcode() == ISD::AND) {
  3769. SDValue N00 = N0->getOperand(0);
  3770. SDValue N01 = N0->getOperand(1);
  3771. BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
  3772. if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
  3773. TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
  3774. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  3775. if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
  3776. N01CV, N1CV))
  3777. return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
  3778. }
  3779. } else {
  3780. N1C = isConstOrConstSplat(N1);
  3781. }
  3782. }
  3783. }
  3784. // fold (shl c1, c2) -> c1<<c2
  3785. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  3786. if (N0C && N1C && !N1C->isOpaque())
  3787. return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
  3788. // fold (shl 0, x) -> 0
  3789. if (isNullConstant(N0))
  3790. return N0;
  3791. // fold (shl x, c >= size(x)) -> undef
  3792. if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
  3793. return DAG.getUNDEF(VT);
  3794. // fold (shl x, 0) -> x
  3795. if (N1C && N1C->isNullValue())
  3796. return N0;
  3797. // fold (shl undef, x) -> 0
  3798. if (N0.getOpcode() == ISD::UNDEF)
  3799. return DAG.getConstant(0, SDLoc(N), VT);
  3800. // if (shl x, c) is known to be zero, return 0
  3801. if (DAG.MaskedValueIsZero(SDValue(N, 0),
  3802. APInt::getAllOnesValue(OpSizeInBits)))
  3803. return DAG.getConstant(0, SDLoc(N), VT);
  3804. // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
  3805. if (N1.getOpcode() == ISD::TRUNCATE &&
  3806. N1.getOperand(0).getOpcode() == ISD::AND) {
  3807. SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
  3808. if (NewOp1.getNode())
  3809. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
  3810. }
  3811. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  3812. return SDValue(N, 0);
  3813. // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
  3814. if (N1C && N0.getOpcode() == ISD::SHL) {
  3815. if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
  3816. uint64_t c1 = N0C1->getZExtValue();
  3817. uint64_t c2 = N1C->getZExtValue();
  3818. SDLoc DL(N);
  3819. if (c1 + c2 >= OpSizeInBits)
  3820. return DAG.getConstant(0, DL, VT);
  3821. return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
  3822. DAG.getConstant(c1 + c2, DL, N1.getValueType()));
  3823. }
  3824. }
  3825. // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
  3826. // For this to be valid, the second form must not preserve any of the bits
  3827. // that are shifted out by the inner shift in the first form. This means
  3828. // the outer shift size must be >= the number of bits added by the ext.
  3829. // As a corollary, we don't care what kind of ext it is.
  3830. if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
  3831. N0.getOpcode() == ISD::ANY_EXTEND ||
  3832. N0.getOpcode() == ISD::SIGN_EXTEND) &&
  3833. N0.getOperand(0).getOpcode() == ISD::SHL) {
  3834. SDValue N0Op0 = N0.getOperand(0);
  3835. if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
  3836. uint64_t c1 = N0Op0C1->getZExtValue();
  3837. uint64_t c2 = N1C->getZExtValue();
  3838. EVT InnerShiftVT = N0Op0.getValueType();
  3839. uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
  3840. if (c2 >= OpSizeInBits - InnerShiftSize) {
  3841. SDLoc DL(N0);
  3842. if (c1 + c2 >= OpSizeInBits)
  3843. return DAG.getConstant(0, DL, VT);
  3844. return DAG.getNode(ISD::SHL, DL, VT,
  3845. DAG.getNode(N0.getOpcode(), DL, VT,
  3846. N0Op0->getOperand(0)),
  3847. DAG.getConstant(c1 + c2, DL, N1.getValueType()));
  3848. }
  3849. }
  3850. }
  3851. // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
  3852. // Only fold this if the inner zext has no other uses to avoid increasing
  3853. // the total number of instructions.
  3854. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
  3855. N0.getOperand(0).getOpcode() == ISD::SRL) {
  3856. SDValue N0Op0 = N0.getOperand(0);
  3857. if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
  3858. uint64_t c1 = N0Op0C1->getZExtValue();
  3859. if (c1 < VT.getScalarSizeInBits()) {
  3860. uint64_t c2 = N1C->getZExtValue();
  3861. if (c1 == c2) {
  3862. SDValue NewOp0 = N0.getOperand(0);
  3863. EVT CountVT = NewOp0.getOperand(1).getValueType();
  3864. SDLoc DL(N);
  3865. SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
  3866. NewOp0,
  3867. DAG.getConstant(c2, DL, CountVT));
  3868. AddToWorklist(NewSHL.getNode());
  3869. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
  3870. }
  3871. }
  3872. }
  3873. }
  3874. // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
  3875. // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
  3876. if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
  3877. cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
  3878. if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
  3879. uint64_t C1 = N0C1->getZExtValue();
  3880. uint64_t C2 = N1C->getZExtValue();
  3881. SDLoc DL(N);
  3882. if (C1 <= C2)
  3883. return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
  3884. DAG.getConstant(C2 - C1, DL, N1.getValueType()));
  3885. return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
  3886. DAG.getConstant(C1 - C2, DL, N1.getValueType()));
  3887. }
  3888. }
  3889. // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
  3890. // (and (srl x, (sub c1, c2), MASK)
  3891. // Only fold this if the inner shift has no other uses -- if it does, folding
  3892. // this will increase the total number of instructions.
  3893. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
  3894. if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
  3895. uint64_t c1 = N0C1->getZExtValue();
  3896. if (c1 < OpSizeInBits) {
  3897. uint64_t c2 = N1C->getZExtValue();
  3898. APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
  3899. SDValue Shift;
  3900. if (c2 > c1) {
  3901. Mask = Mask.shl(c2 - c1);
  3902. SDLoc DL(N);
  3903. Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
  3904. DAG.getConstant(c2 - c1, DL, N1.getValueType()));
  3905. } else {
  3906. Mask = Mask.lshr(c1 - c2);
  3907. SDLoc DL(N);
  3908. Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
  3909. DAG.getConstant(c1 - c2, DL, N1.getValueType()));
  3910. }
  3911. SDLoc DL(N0);
  3912. return DAG.getNode(ISD::AND, DL, VT, Shift,
  3913. DAG.getConstant(Mask, DL, VT));
  3914. }
  3915. }
  3916. }
  3917. // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
  3918. if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
  3919. unsigned BitSize = VT.getScalarSizeInBits();
  3920. SDLoc DL(N);
  3921. SDValue HiBitsMask =
  3922. DAG.getConstant(APInt::getHighBitsSet(BitSize,
  3923. BitSize - N1C->getZExtValue()),
  3924. DL, VT);
  3925. return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
  3926. HiBitsMask);
  3927. }
  3928. // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
  3929. // Variant of version done on multiply, except mul by a power of 2 is turned
  3930. // into a shift.
  3931. APInt Val;
  3932. if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
  3933. (isa<ConstantSDNode>(N0.getOperand(1)) ||
  3934. isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
  3935. SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
  3936. SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
  3937. return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
  3938. }
  3939. if (N1C && !N1C->isOpaque()) {
  3940. SDValue NewSHL = visitShiftByConstant(N, N1C);
  3941. if (NewSHL.getNode())
  3942. return NewSHL;
  3943. }
  3944. return SDValue();
  3945. }
  3946. SDValue DAGCombiner::visitSRA(SDNode *N) {
  3947. SDValue N0 = N->getOperand(0);
  3948. SDValue N1 = N->getOperand(1);
  3949. EVT VT = N0.getValueType();
  3950. unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  3951. // fold vector ops
  3952. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3953. if (VT.isVector()) {
  3954. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3955. return FoldedVOp;
  3956. N1C = isConstOrConstSplat(N1);
  3957. }
  3958. // fold (sra c1, c2) -> (sra c1, c2)
  3959. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  3960. if (N0C && N1C && !N1C->isOpaque())
  3961. return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
  3962. // fold (sra 0, x) -> 0
  3963. if (isNullConstant(N0))
  3964. return N0;
  3965. // fold (sra -1, x) -> -1
  3966. if (isAllOnesConstant(N0))
  3967. return N0;
  3968. // fold (sra x, (setge c, size(x))) -> undef
  3969. if (N1C && N1C->getZExtValue() >= OpSizeInBits)
  3970. return DAG.getUNDEF(VT);
  3971. // fold (sra x, 0) -> x
  3972. if (N1C && N1C->isNullValue())
  3973. return N0;
  3974. // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
  3975. // sext_inreg.
  3976. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
  3977. unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
  3978. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
  3979. if (VT.isVector())
  3980. ExtVT = EVT::getVectorVT(*DAG.getContext(),
  3981. ExtVT, VT.getVectorNumElements());
  3982. if ((!LegalOperations ||
  3983. TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
  3984. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  3985. N0.getOperand(0), DAG.getValueType(ExtVT));
  3986. }
  3987. // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
  3988. if (N1C && N0.getOpcode() == ISD::SRA) {
  3989. if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
  3990. unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
  3991. if (Sum >= OpSizeInBits)
  3992. Sum = OpSizeInBits - 1;
  3993. SDLoc DL(N);
  3994. return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
  3995. DAG.getConstant(Sum, DL, N1.getValueType()));
  3996. }
  3997. }
  3998. // fold (sra (shl X, m), (sub result_size, n))
  3999. // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
  4000. // result_size - n != m.
  4001. // If truncate is free for the target sext(shl) is likely to result in better
  4002. // code.
  4003. if (N0.getOpcode() == ISD::SHL && N1C) {
  4004. // Get the two constanst of the shifts, CN0 = m, CN = n.
  4005. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
  4006. if (N01C) {
  4007. LLVMContext &Ctx = *DAG.getContext();
  4008. // Determine what the truncate's result bitsize and type would be.
  4009. EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
  4010. if (VT.isVector())
  4011. TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
  4012. // Determine the residual right-shift amount.
  4013. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
  4014. // If the shift is not a no-op (in which case this should be just a sign
  4015. // extend already), the truncated to type is legal, sign_extend is legal
  4016. // on that type, and the truncate to that type is both legal and free,
  4017. // perform the transform.
  4018. if ((ShiftAmt > 0) &&
  4019. TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
  4020. TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
  4021. TLI.isTruncateFree(VT, TruncVT)) {
  4022. SDLoc DL(N);
  4023. SDValue Amt = DAG.getConstant(ShiftAmt, DL,
  4024. getShiftAmountTy(N0.getOperand(0).getValueType()));
  4025. SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
  4026. N0.getOperand(0), Amt);
  4027. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
  4028. Shift);
  4029. return DAG.getNode(ISD::SIGN_EXTEND, DL,
  4030. N->getValueType(0), Trunc);
  4031. }
  4032. }
  4033. }
  4034. // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
  4035. if (N1.getOpcode() == ISD::TRUNCATE &&
  4036. N1.getOperand(0).getOpcode() == ISD::AND) {
  4037. SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
  4038. if (NewOp1.getNode())
  4039. return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
  4040. }
  4041. // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
  4042. // if c1 is equal to the number of bits the trunc removes
  4043. if (N0.getOpcode() == ISD::TRUNCATE &&
  4044. (N0.getOperand(0).getOpcode() == ISD::SRL ||
  4045. N0.getOperand(0).getOpcode() == ISD::SRA) &&
  4046. N0.getOperand(0).hasOneUse() &&
  4047. N0.getOperand(0).getOperand(1).hasOneUse() &&
  4048. N1C) {
  4049. SDValue N0Op0 = N0.getOperand(0);
  4050. if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
  4051. unsigned LargeShiftVal = LargeShift->getZExtValue();
  4052. EVT LargeVT = N0Op0.getValueType();
  4053. if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
  4054. SDLoc DL(N);
  4055. SDValue Amt =
  4056. DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
  4057. getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
  4058. SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
  4059. N0Op0.getOperand(0), Amt);
  4060. return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
  4061. }
  4062. }
  4063. }
  4064. // Simplify, based on bits shifted out of the LHS.
  4065. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  4066. return SDValue(N, 0);
  4067. // If the sign bit is known to be zero, switch this to a SRL.
  4068. if (DAG.SignBitIsZero(N0))
  4069. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  4070. if (N1C && !N1C->isOpaque()) {
  4071. SDValue NewSRA = visitShiftByConstant(N, N1C);
  4072. if (NewSRA.getNode())
  4073. return NewSRA;
  4074. }
  4075. return SDValue();
  4076. }
  4077. SDValue DAGCombiner::visitSRL(SDNode *N) {
  4078. SDValue N0 = N->getOperand(0);
  4079. SDValue N1 = N->getOperand(1);
  4080. EVT VT = N0.getValueType();
  4081. unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  4082. // fold vector ops
  4083. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  4084. if (VT.isVector()) {
  4085. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  4086. return FoldedVOp;
  4087. N1C = isConstOrConstSplat(N1);
  4088. }
  4089. // fold (srl c1, c2) -> c1 >>u c2
  4090. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  4091. if (N0C && N1C && !N1C->isOpaque())
  4092. return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
  4093. // fold (srl 0, x) -> 0
  4094. if (isNullConstant(N0))
  4095. return N0;
  4096. // fold (srl x, c >= size(x)) -> undef
  4097. if (N1C && N1C->getZExtValue() >= OpSizeInBits)
  4098. return DAG.getUNDEF(VT);
  4099. // fold (srl x, 0) -> x
  4100. if (N1C && N1C->isNullValue())
  4101. return N0;
  4102. // if (srl x, c) is known to be zero, return 0
  4103. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  4104. APInt::getAllOnesValue(OpSizeInBits)))
  4105. return DAG.getConstant(0, SDLoc(N), VT);
  4106. // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
  4107. if (N1C && N0.getOpcode() == ISD::SRL) {
  4108. if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
  4109. uint64_t c1 = N01C->getZExtValue();
  4110. uint64_t c2 = N1C->getZExtValue();
  4111. SDLoc DL(N);
  4112. if (c1 + c2 >= OpSizeInBits)
  4113. return DAG.getConstant(0, DL, VT);
  4114. return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
  4115. DAG.getConstant(c1 + c2, DL, N1.getValueType()));
  4116. }
  4117. }
  4118. // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
  4119. if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
  4120. N0.getOperand(0).getOpcode() == ISD::SRL &&
  4121. isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
  4122. uint64_t c1 =
  4123. cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
  4124. uint64_t c2 = N1C->getZExtValue();
  4125. EVT InnerShiftVT = N0.getOperand(0).getValueType();
  4126. EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
  4127. uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
  4128. // This is only valid if the OpSizeInBits + c1 = size of inner shift.
  4129. if (c1 + OpSizeInBits == InnerShiftSize) {
  4130. SDLoc DL(N0);
  4131. if (c1 + c2 >= InnerShiftSize)
  4132. return DAG.getConstant(0, DL, VT);
  4133. return DAG.getNode(ISD::TRUNCATE, DL, VT,
  4134. DAG.getNode(ISD::SRL, DL, InnerShiftVT,
  4135. N0.getOperand(0)->getOperand(0),
  4136. DAG.getConstant(c1 + c2, DL,
  4137. ShiftCountVT)));
  4138. }
  4139. }
  4140. // fold (srl (shl x, c), c) -> (and x, cst2)
  4141. if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
  4142. unsigned BitSize = N0.getScalarValueSizeInBits();
  4143. if (BitSize <= 64) {
  4144. uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
  4145. SDLoc DL(N);
  4146. return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
  4147. DAG.getConstant(~0ULL >> ShAmt, DL, VT));
  4148. }
  4149. }
  4150. // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
  4151. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  4152. // Shifting in all undef bits?
  4153. EVT SmallVT = N0.getOperand(0).getValueType();
  4154. unsigned BitSize = SmallVT.getScalarSizeInBits();
  4155. if (N1C->getZExtValue() >= BitSize)
  4156. return DAG.getUNDEF(VT);
  4157. if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
  4158. uint64_t ShiftAmt = N1C->getZExtValue();
  4159. SDLoc DL0(N0);
  4160. SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
  4161. N0.getOperand(0),
  4162. DAG.getConstant(ShiftAmt, DL0,
  4163. getShiftAmountTy(SmallVT)));
  4164. AddToWorklist(SmallShift.getNode());
  4165. APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
  4166. SDLoc DL(N);
  4167. return DAG.getNode(ISD::AND, DL, VT,
  4168. DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
  4169. DAG.getConstant(Mask, DL, VT));
  4170. }
  4171. }
  4172. // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
  4173. // bit, which is unmodified by sra.
  4174. if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
  4175. if (N0.getOpcode() == ISD::SRA)
  4176. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
  4177. }
  4178. // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
  4179. if (N1C && N0.getOpcode() == ISD::CTLZ &&
  4180. N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
  4181. APInt KnownZero, KnownOne;
  4182. DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
  4183. // If any of the input bits are KnownOne, then the input couldn't be all
  4184. // zeros, thus the result of the srl will always be zero.
  4185. if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
  4186. // If all of the bits input the to ctlz node are known to be zero, then
  4187. // the result of the ctlz is "32" and the result of the shift is one.
  4188. APInt UnknownBits = ~KnownZero;
  4189. if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
  4190. // Otherwise, check to see if there is exactly one bit input to the ctlz.
  4191. if ((UnknownBits & (UnknownBits - 1)) == 0) {
  4192. // Okay, we know that only that the single bit specified by UnknownBits
  4193. // could be set on input to the CTLZ node. If this bit is set, the SRL
  4194. // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
  4195. // to an SRL/XOR pair, which is likely to simplify more.
  4196. unsigned ShAmt = UnknownBits.countTrailingZeros();
  4197. SDValue Op = N0.getOperand(0);
  4198. if (ShAmt) {
  4199. SDLoc DL(N0);
  4200. Op = DAG.getNode(ISD::SRL, DL, VT, Op,
  4201. DAG.getConstant(ShAmt, DL,
  4202. getShiftAmountTy(Op.getValueType())));
  4203. AddToWorklist(Op.getNode());
  4204. }
  4205. SDLoc DL(N);
  4206. return DAG.getNode(ISD::XOR, DL, VT,
  4207. Op, DAG.getConstant(1, DL, VT));
  4208. }
  4209. }
  4210. // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
  4211. if (N1.getOpcode() == ISD::TRUNCATE &&
  4212. N1.getOperand(0).getOpcode() == ISD::AND) {
  4213. SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
  4214. if (NewOp1.getNode())
  4215. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
  4216. }
  4217. // fold operands of srl based on knowledge that the low bits are not
  4218. // demanded.
  4219. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  4220. return SDValue(N, 0);
  4221. if (N1C && !N1C->isOpaque()) {
  4222. SDValue NewSRL = visitShiftByConstant(N, N1C);
  4223. if (NewSRL.getNode())
  4224. return NewSRL;
  4225. }
  4226. // Attempt to convert a srl of a load into a narrower zero-extending load.
  4227. SDValue NarrowLoad = ReduceLoadWidth(N);
  4228. if (NarrowLoad.getNode())
  4229. return NarrowLoad;
  4230. // Here is a common situation. We want to optimize:
  4231. //
  4232. // %a = ...
  4233. // %b = and i32 %a, 2
  4234. // %c = srl i32 %b, 1
  4235. // brcond i32 %c ...
  4236. //
  4237. // into
  4238. //
  4239. // %a = ...
  4240. // %b = and %a, 2
  4241. // %c = setcc eq %b, 0
  4242. // brcond %c ...
  4243. //
  4244. // However when after the source operand of SRL is optimized into AND, the SRL
  4245. // itself may not be optimized further. Look for it and add the BRCOND into
  4246. // the worklist.
  4247. if (N->hasOneUse()) {
  4248. SDNode *Use = *N->use_begin();
  4249. if (Use->getOpcode() == ISD::BRCOND)
  4250. AddToWorklist(Use);
  4251. else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
  4252. // Also look pass the truncate.
  4253. Use = *Use->use_begin();
  4254. if (Use->getOpcode() == ISD::BRCOND)
  4255. AddToWorklist(Use);
  4256. }
  4257. }
  4258. return SDValue();
  4259. }
  4260. SDValue DAGCombiner::visitBSWAP(SDNode *N) {
  4261. SDValue N0 = N->getOperand(0);
  4262. EVT VT = N->getValueType(0);
  4263. // fold (bswap c1) -> c2
  4264. if (isConstantIntBuildVectorOrConstantInt(N0))
  4265. return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
  4266. // fold (bswap (bswap x)) -> x
  4267. if (N0.getOpcode() == ISD::BSWAP)
  4268. return N0->getOperand(0);
  4269. return SDValue();
  4270. }
  4271. SDValue DAGCombiner::visitCTLZ(SDNode *N) {
  4272. SDValue N0 = N->getOperand(0);
  4273. EVT VT = N->getValueType(0);
  4274. // fold (ctlz c1) -> c2
  4275. if (isConstantIntBuildVectorOrConstantInt(N0))
  4276. return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
  4277. return SDValue();
  4278. }
  4279. SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
  4280. SDValue N0 = N->getOperand(0);
  4281. EVT VT = N->getValueType(0);
  4282. // fold (ctlz_zero_undef c1) -> c2
  4283. if (isConstantIntBuildVectorOrConstantInt(N0))
  4284. return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  4285. return SDValue();
  4286. }
  4287. SDValue DAGCombiner::visitCTTZ(SDNode *N) {
  4288. SDValue N0 = N->getOperand(0);
  4289. EVT VT = N->getValueType(0);
  4290. // fold (cttz c1) -> c2
  4291. if (isConstantIntBuildVectorOrConstantInt(N0))
  4292. return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
  4293. return SDValue();
  4294. }
  4295. SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
  4296. SDValue N0 = N->getOperand(0);
  4297. EVT VT = N->getValueType(0);
  4298. // fold (cttz_zero_undef c1) -> c2
  4299. if (isConstantIntBuildVectorOrConstantInt(N0))
  4300. return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  4301. return SDValue();
  4302. }
  4303. SDValue DAGCombiner::visitCTPOP(SDNode *N) {
  4304. SDValue N0 = N->getOperand(0);
  4305. EVT VT = N->getValueType(0);
  4306. // fold (ctpop c1) -> c2
  4307. if (isConstantIntBuildVectorOrConstantInt(N0))
  4308. return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
  4309. return SDValue();
  4310. }
  4311. /// \brief Generate Min/Max node
  4312. static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
  4313. SDValue True, SDValue False,
  4314. ISD::CondCode CC, const TargetLowering &TLI,
  4315. SelectionDAG &DAG) {
  4316. if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
  4317. return SDValue();
  4318. switch (CC) {
  4319. case ISD::SETOLT:
  4320. case ISD::SETOLE:
  4321. case ISD::SETLT:
  4322. case ISD::SETLE:
  4323. case ISD::SETULT:
  4324. case ISD::SETULE: {
  4325. unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
  4326. if (TLI.isOperationLegal(Opcode, VT))
  4327. return DAG.getNode(Opcode, DL, VT, LHS, RHS);
  4328. return SDValue();
  4329. }
  4330. case ISD::SETOGT:
  4331. case ISD::SETOGE:
  4332. case ISD::SETGT:
  4333. case ISD::SETGE:
  4334. case ISD::SETUGT:
  4335. case ISD::SETUGE: {
  4336. unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
  4337. if (TLI.isOperationLegal(Opcode, VT))
  4338. return DAG.getNode(Opcode, DL, VT, LHS, RHS);
  4339. return SDValue();
  4340. }
  4341. default:
  4342. return SDValue();
  4343. }
  4344. }
  4345. SDValue DAGCombiner::visitSELECT(SDNode *N) {
  4346. SDValue N0 = N->getOperand(0);
  4347. SDValue N1 = N->getOperand(1);
  4348. SDValue N2 = N->getOperand(2);
  4349. EVT VT = N->getValueType(0);
  4350. EVT VT0 = N0.getValueType();
  4351. // fold (select C, X, X) -> X
  4352. if (N1 == N2)
  4353. return N1;
  4354. if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
  4355. // fold (select true, X, Y) -> X
  4356. // fold (select false, X, Y) -> Y
  4357. return !N0C->isNullValue() ? N1 : N2;
  4358. }
  4359. // fold (select C, 1, X) -> (or C, X)
  4360. if (VT == MVT::i1 && isOneConstant(N1))
  4361. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
  4362. // fold (select C, 0, 1) -> (xor C, 1)
  4363. // We can't do this reliably if integer based booleans have different contents
  4364. // to floating point based booleans. This is because we can't tell whether we
  4365. // have an integer-based boolean or a floating-point-based boolean unless we
  4366. // can find the SETCC that produced it and inspect its operands. This is
  4367. // fairly easy if C is the SETCC node, but it can potentially be
  4368. // undiscoverable (or not reasonably discoverable). For example, it could be
  4369. // in another basic block or it could require searching a complicated
  4370. // expression.
  4371. if (VT.isInteger() &&
  4372. (VT0 == MVT::i1 || (VT0.isInteger() &&
  4373. TLI.getBooleanContents(false, false) ==
  4374. TLI.getBooleanContents(false, true) &&
  4375. TLI.getBooleanContents(false, false) ==
  4376. TargetLowering::ZeroOrOneBooleanContent)) &&
  4377. isNullConstant(N1) && isOneConstant(N2)) {
  4378. SDValue XORNode;
  4379. if (VT == VT0) {
  4380. SDLoc DL(N);
  4381. return DAG.getNode(ISD::XOR, DL, VT0,
  4382. N0, DAG.getConstant(1, DL, VT0));
  4383. }
  4384. SDLoc DL0(N0);
  4385. XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
  4386. N0, DAG.getConstant(1, DL0, VT0));
  4387. AddToWorklist(XORNode.getNode());
  4388. if (VT.bitsGT(VT0))
  4389. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
  4390. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
  4391. }
  4392. // fold (select C, 0, X) -> (and (not C), X)
  4393. if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
  4394. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  4395. AddToWorklist(NOTNode.getNode());
  4396. return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
  4397. }
  4398. // fold (select C, X, 1) -> (or (not C), X)
  4399. if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
  4400. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  4401. AddToWorklist(NOTNode.getNode());
  4402. return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
  4403. }
  4404. // fold (select C, X, 0) -> (and C, X)
  4405. if (VT == MVT::i1 && isNullConstant(N2))
  4406. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
  4407. // fold (select X, X, Y) -> (or X, Y)
  4408. // fold (select X, 1, Y) -> (or X, Y)
  4409. if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
  4410. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
  4411. // fold (select X, Y, X) -> (and X, Y)
  4412. // fold (select X, Y, 0) -> (and X, Y)
  4413. if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
  4414. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
  4415. // If we can fold this based on the true/false value, do so.
  4416. if (SimplifySelectOps(N, N1, N2))
  4417. return SDValue(N, 0); // Don't revisit N.
  4418. // fold selects based on a setcc into other things, such as min/max/abs
  4419. if (N0.getOpcode() == ISD::SETCC) {
  4420. // select x, y (fcmp lt x, y) -> fminnum x, y
  4421. // select x, y (fcmp gt x, y) -> fmaxnum x, y
  4422. //
  4423. // This is OK if we don't care about what happens if either operand is a
  4424. // NaN.
  4425. //
  4426. // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
  4427. // no signed zeros as well as no nans.
  4428. const TargetOptions &Options = DAG.getTarget().Options;
  4429. if (Options.UnsafeFPMath &&
  4430. VT.isFloatingPoint() && N0.hasOneUse() &&
  4431. DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
  4432. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  4433. SDValue FMinMax =
  4434. combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
  4435. N1, N2, CC, TLI, DAG);
  4436. if (FMinMax)
  4437. return FMinMax;
  4438. }
  4439. if ((!LegalOperations &&
  4440. TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
  4441. TLI.isOperationLegal(ISD::SELECT_CC, VT))
  4442. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
  4443. N0.getOperand(0), N0.getOperand(1),
  4444. N1, N2, N0.getOperand(2));
  4445. return SimplifySelect(SDLoc(N), N0, N1, N2);
  4446. }
  4447. if (VT0 == MVT::i1) {
  4448. if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
  4449. // select (and Cond0, Cond1), X, Y
  4450. // -> select Cond0, (select Cond1, X, Y), Y
  4451. if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
  4452. SDValue Cond0 = N0->getOperand(0);
  4453. SDValue Cond1 = N0->getOperand(1);
  4454. SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
  4455. N1.getValueType(), Cond1, N1, N2);
  4456. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
  4457. InnerSelect, N2);
  4458. }
  4459. // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
  4460. if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
  4461. SDValue Cond0 = N0->getOperand(0);
  4462. SDValue Cond1 = N0->getOperand(1);
  4463. SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
  4464. N1.getValueType(), Cond1, N1, N2);
  4465. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
  4466. InnerSelect);
  4467. }
  4468. }
  4469. // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
  4470. if (N1->getOpcode() == ISD::SELECT) {
  4471. SDValue N1_0 = N1->getOperand(0);
  4472. SDValue N1_1 = N1->getOperand(1);
  4473. SDValue N1_2 = N1->getOperand(2);
  4474. if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
  4475. // Create the actual and node if we can generate good code for it.
  4476. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
  4477. SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
  4478. N0, N1_0);
  4479. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
  4480. N1_1, N2);
  4481. }
  4482. // Otherwise see if we can optimize the "and" to a better pattern.
  4483. if (SDValue Combined = visitANDLike(N0, N1_0, N))
  4484. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
  4485. N1_1, N2);
  4486. }
  4487. }
  4488. // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
  4489. if (N2->getOpcode() == ISD::SELECT) {
  4490. SDValue N2_0 = N2->getOperand(0);
  4491. SDValue N2_1 = N2->getOperand(1);
  4492. SDValue N2_2 = N2->getOperand(2);
  4493. if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
  4494. // Create the actual or node if we can generate good code for it.
  4495. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
  4496. SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
  4497. N0, N2_0);
  4498. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
  4499. N1, N2_2);
  4500. }
  4501. // Otherwise see if we can optimize to a better pattern.
  4502. if (SDValue Combined = visitORLike(N0, N2_0, N))
  4503. return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
  4504. N1, N2_2);
  4505. }
  4506. }
  4507. }
  4508. return SDValue();
  4509. }
  4510. static
  4511. std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
  4512. SDLoc DL(N);
  4513. EVT LoVT, HiVT;
  4514. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
  4515. // Split the inputs.
  4516. SDValue Lo, Hi, LL, LH, RL, RH;
  4517. std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
  4518. std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
  4519. Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
  4520. Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
  4521. return std::make_pair(Lo, Hi);
  4522. }
  4523. // This function assumes all the vselect's arguments are CONCAT_VECTOR
  4524. // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
  4525. static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
  4526. SDLoc dl(N);
  4527. SDValue Cond = N->getOperand(0);
  4528. SDValue LHS = N->getOperand(1);
  4529. SDValue RHS = N->getOperand(2);
  4530. EVT VT = N->getValueType(0);
  4531. int NumElems = VT.getVectorNumElements();
  4532. assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
  4533. RHS.getOpcode() == ISD::CONCAT_VECTORS &&
  4534. Cond.getOpcode() == ISD::BUILD_VECTOR);
  4535. // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
  4536. // binary ones here.
  4537. if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
  4538. return SDValue();
  4539. // We're sure we have an even number of elements due to the
  4540. // concat_vectors we have as arguments to vselect.
  4541. // Skip BV elements until we find one that's not an UNDEF
  4542. // After we find an UNDEF element, keep looping until we get to half the
  4543. // length of the BV and see if all the non-undef nodes are the same.
  4544. ConstantSDNode *BottomHalf = nullptr;
  4545. for (int i = 0; i < NumElems / 2; ++i) {
  4546. if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
  4547. continue;
  4548. if (BottomHalf == nullptr)
  4549. BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
  4550. else if (Cond->getOperand(i).getNode() != BottomHalf)
  4551. return SDValue();
  4552. }
  4553. // Do the same for the second half of the BuildVector
  4554. ConstantSDNode *TopHalf = nullptr;
  4555. for (int i = NumElems / 2; i < NumElems; ++i) {
  4556. if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
  4557. continue;
  4558. if (TopHalf == nullptr)
  4559. TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
  4560. else if (Cond->getOperand(i).getNode() != TopHalf)
  4561. return SDValue();
  4562. }
  4563. assert(TopHalf && BottomHalf &&
  4564. "One half of the selector was all UNDEFs and the other was all the "
  4565. "same value. This should have been addressed before this function.");
  4566. return DAG.getNode(
  4567. ISD::CONCAT_VECTORS, dl, VT,
  4568. BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
  4569. TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
  4570. }
  4571. SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
  4572. if (Level >= AfterLegalizeTypes)
  4573. return SDValue();
  4574. MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
  4575. SDValue Mask = MSC->getMask();
  4576. SDValue Data = MSC->getValue();
  4577. SDLoc DL(N);
  4578. // If the MSCATTER data type requires splitting and the mask is provided by a
  4579. // SETCC, then split both nodes and its operands before legalization. This
  4580. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  4581. // and enables future optimizations (e.g. min/max pattern matching on X86).
  4582. if (Mask.getOpcode() != ISD::SETCC)
  4583. return SDValue();
  4584. // Check if any splitting is required.
  4585. if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
  4586. TargetLowering::TypeSplitVector)
  4587. return SDValue();
  4588. SDValue MaskLo, MaskHi, Lo, Hi;
  4589. std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
  4590. EVT LoVT, HiVT;
  4591. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
  4592. SDValue Chain = MSC->getChain();
  4593. EVT MemoryVT = MSC->getMemoryVT();
  4594. unsigned Alignment = MSC->getOriginalAlignment();
  4595. EVT LoMemVT, HiMemVT;
  4596. std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
  4597. SDValue DataLo, DataHi;
  4598. std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
  4599. SDValue BasePtr = MSC->getBasePtr();
  4600. SDValue IndexLo, IndexHi;
  4601. std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
  4602. MachineMemOperand *MMO = DAG.getMachineFunction().
  4603. getMachineMemOperand(MSC->getPointerInfo(),
  4604. MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
  4605. Alignment, MSC->getAAInfo(), MSC->getRanges());
  4606. SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
  4607. Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
  4608. DL, OpsLo, MMO);
  4609. SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
  4610. Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
  4611. DL, OpsHi, MMO);
  4612. AddToWorklist(Lo.getNode());
  4613. AddToWorklist(Hi.getNode());
  4614. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
  4615. }
  4616. SDValue DAGCombiner::visitMSTORE(SDNode *N) {
  4617. if (Level >= AfterLegalizeTypes)
  4618. return SDValue();
  4619. MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
  4620. SDValue Mask = MST->getMask();
  4621. SDValue Data = MST->getValue();
  4622. SDLoc DL(N);
  4623. // If the MSTORE data type requires splitting and the mask is provided by a
  4624. // SETCC, then split both nodes and its operands before legalization. This
  4625. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  4626. // and enables future optimizations (e.g. min/max pattern matching on X86).
  4627. if (Mask.getOpcode() == ISD::SETCC) {
  4628. // Check if any splitting is required.
  4629. if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
  4630. TargetLowering::TypeSplitVector)
  4631. return SDValue();
  4632. SDValue MaskLo, MaskHi, Lo, Hi;
  4633. std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
  4634. EVT LoVT, HiVT;
  4635. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
  4636. SDValue Chain = MST->getChain();
  4637. SDValue Ptr = MST->getBasePtr();
  4638. EVT MemoryVT = MST->getMemoryVT();
  4639. unsigned Alignment = MST->getOriginalAlignment();
  4640. // if Alignment is equal to the vector size,
  4641. // take the half of it for the second part
  4642. unsigned SecondHalfAlignment =
  4643. (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
  4644. Alignment/2 : Alignment;
  4645. EVT LoMemVT, HiMemVT;
  4646. std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
  4647. SDValue DataLo, DataHi;
  4648. std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
  4649. MachineMemOperand *MMO = DAG.getMachineFunction().
  4650. getMachineMemOperand(MST->getPointerInfo(),
  4651. MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
  4652. Alignment, MST->getAAInfo(), MST->getRanges());
  4653. Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
  4654. MST->isTruncatingStore());
  4655. unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
  4656. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
  4657. DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
  4658. MMO = DAG.getMachineFunction().
  4659. getMachineMemOperand(MST->getPointerInfo(),
  4660. MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
  4661. SecondHalfAlignment, MST->getAAInfo(),
  4662. MST->getRanges());
  4663. Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
  4664. MST->isTruncatingStore());
  4665. AddToWorklist(Lo.getNode());
  4666. AddToWorklist(Hi.getNode());
  4667. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
  4668. }
  4669. return SDValue();
  4670. }
  4671. SDValue DAGCombiner::visitMGATHER(SDNode *N) {
  4672. if (Level >= AfterLegalizeTypes)
  4673. return SDValue();
  4674. MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
  4675. SDValue Mask = MGT->getMask();
  4676. SDLoc DL(N);
  4677. // If the MGATHER result requires splitting and the mask is provided by a
  4678. // SETCC, then split both nodes and its operands before legalization. This
  4679. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  4680. // and enables future optimizations (e.g. min/max pattern matching on X86).
  4681. if (Mask.getOpcode() != ISD::SETCC)
  4682. return SDValue();
  4683. EVT VT = N->getValueType(0);
  4684. // Check if any splitting is required.
  4685. if (TLI.getTypeAction(*DAG.getContext(), VT) !=
  4686. TargetLowering::TypeSplitVector)
  4687. return SDValue();
  4688. SDValue MaskLo, MaskHi, Lo, Hi;
  4689. std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
  4690. SDValue Src0 = MGT->getValue();
  4691. SDValue Src0Lo, Src0Hi;
  4692. std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
  4693. EVT LoVT, HiVT;
  4694. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
  4695. SDValue Chain = MGT->getChain();
  4696. EVT MemoryVT = MGT->getMemoryVT();
  4697. unsigned Alignment = MGT->getOriginalAlignment();
  4698. EVT LoMemVT, HiMemVT;
  4699. std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
  4700. SDValue BasePtr = MGT->getBasePtr();
  4701. SDValue Index = MGT->getIndex();
  4702. SDValue IndexLo, IndexHi;
  4703. std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
  4704. MachineMemOperand *MMO = DAG.getMachineFunction().
  4705. getMachineMemOperand(MGT->getPointerInfo(),
  4706. MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
  4707. Alignment, MGT->getAAInfo(), MGT->getRanges());
  4708. SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
  4709. Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
  4710. MMO);
  4711. SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
  4712. Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
  4713. MMO);
  4714. AddToWorklist(Lo.getNode());
  4715. AddToWorklist(Hi.getNode());
  4716. // Build a factor node to remember that this load is independent of the
  4717. // other one.
  4718. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
  4719. Hi.getValue(1));
  4720. // Legalized the chain result - switch anything that used the old chain to
  4721. // use the new one.
  4722. DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
  4723. SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
  4724. SDValue RetOps[] = { GatherRes, Chain };
  4725. return DAG.getMergeValues(RetOps, DL);
  4726. }
  4727. SDValue DAGCombiner::visitMLOAD(SDNode *N) {
  4728. if (Level >= AfterLegalizeTypes)
  4729. return SDValue();
  4730. MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
  4731. SDValue Mask = MLD->getMask();
  4732. SDLoc DL(N);
  4733. // If the MLOAD result requires splitting and the mask is provided by a
  4734. // SETCC, then split both nodes and its operands before legalization. This
  4735. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  4736. // and enables future optimizations (e.g. min/max pattern matching on X86).
  4737. if (Mask.getOpcode() == ISD::SETCC) {
  4738. EVT VT = N->getValueType(0);
  4739. // Check if any splitting is required.
  4740. if (TLI.getTypeAction(*DAG.getContext(), VT) !=
  4741. TargetLowering::TypeSplitVector)
  4742. return SDValue();
  4743. SDValue MaskLo, MaskHi, Lo, Hi;
  4744. std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
  4745. SDValue Src0 = MLD->getSrc0();
  4746. SDValue Src0Lo, Src0Hi;
  4747. std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
  4748. EVT LoVT, HiVT;
  4749. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
  4750. SDValue Chain = MLD->getChain();
  4751. SDValue Ptr = MLD->getBasePtr();
  4752. EVT MemoryVT = MLD->getMemoryVT();
  4753. unsigned Alignment = MLD->getOriginalAlignment();
  4754. // if Alignment is equal to the vector size,
  4755. // take the half of it for the second part
  4756. unsigned SecondHalfAlignment =
  4757. (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
  4758. Alignment/2 : Alignment;
  4759. EVT LoMemVT, HiMemVT;
  4760. std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
  4761. MachineMemOperand *MMO = DAG.getMachineFunction().
  4762. getMachineMemOperand(MLD->getPointerInfo(),
  4763. MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
  4764. Alignment, MLD->getAAInfo(), MLD->getRanges());
  4765. Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
  4766. ISD::NON_EXTLOAD);
  4767. unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
  4768. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
  4769. DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
  4770. MMO = DAG.getMachineFunction().
  4771. getMachineMemOperand(MLD->getPointerInfo(),
  4772. MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
  4773. SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
  4774. Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
  4775. ISD::NON_EXTLOAD);
  4776. AddToWorklist(Lo.getNode());
  4777. AddToWorklist(Hi.getNode());
  4778. // Build a factor node to remember that this load is independent of the
  4779. // other one.
  4780. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
  4781. Hi.getValue(1));
  4782. // Legalized the chain result - switch anything that used the old chain to
  4783. // use the new one.
  4784. DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
  4785. SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
  4786. SDValue RetOps[] = { LoadRes, Chain };
  4787. return DAG.getMergeValues(RetOps, DL);
  4788. }
  4789. return SDValue();
  4790. }
  4791. SDValue DAGCombiner::visitVSELECT(SDNode *N) {
  4792. SDValue N0 = N->getOperand(0);
  4793. SDValue N1 = N->getOperand(1);
  4794. SDValue N2 = N->getOperand(2);
  4795. SDLoc DL(N);
  4796. // Canonicalize integer abs.
  4797. // vselect (setg[te] X, 0), X, -X ->
  4798. // vselect (setgt X, -1), X, -X ->
  4799. // vselect (setl[te] X, 0), -X, X ->
  4800. // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  4801. if (N0.getOpcode() == ISD::SETCC) {
  4802. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  4803. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  4804. bool isAbs = false;
  4805. bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
  4806. if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
  4807. (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
  4808. N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
  4809. isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
  4810. else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
  4811. N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
  4812. isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
  4813. if (isAbs) {
  4814. EVT VT = LHS.getValueType();
  4815. SDValue Shift = DAG.getNode(
  4816. ISD::SRA, DL, VT, LHS,
  4817. DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
  4818. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
  4819. AddToWorklist(Shift.getNode());
  4820. AddToWorklist(Add.getNode());
  4821. return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
  4822. }
  4823. }
  4824. if (SimplifySelectOps(N, N1, N2))
  4825. return SDValue(N, 0); // Don't revisit N.
  4826. // If the VSELECT result requires splitting and the mask is provided by a
  4827. // SETCC, then split both nodes and its operands before legalization. This
  4828. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  4829. // and enables future optimizations (e.g. min/max pattern matching on X86).
  4830. if (N0.getOpcode() == ISD::SETCC) {
  4831. EVT VT = N->getValueType(0);
  4832. // Check if any splitting is required.
  4833. if (TLI.getTypeAction(*DAG.getContext(), VT) !=
  4834. TargetLowering::TypeSplitVector)
  4835. return SDValue();
  4836. SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
  4837. std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
  4838. std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
  4839. std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
  4840. Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
  4841. Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
  4842. // Add the new VSELECT nodes to the work list in case they need to be split
  4843. // again.
  4844. AddToWorklist(Lo.getNode());
  4845. AddToWorklist(Hi.getNode());
  4846. return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
  4847. }
  4848. // Fold (vselect (build_vector all_ones), N1, N2) -> N1
  4849. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  4850. return N1;
  4851. // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
  4852. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  4853. return N2;
  4854. // The ConvertSelectToConcatVector function is assuming both the above
  4855. // checks for (vselect (build_vector all{ones,zeros) ...) have been made
  4856. // and addressed.
  4857. if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
  4858. N2.getOpcode() == ISD::CONCAT_VECTORS &&
  4859. ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
  4860. SDValue CV = ConvertSelectToConcatVector(N, DAG);
  4861. if (CV.getNode())
  4862. return CV;
  4863. }
  4864. return SDValue();
  4865. }
  4866. SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
  4867. SDValue N0 = N->getOperand(0);
  4868. SDValue N1 = N->getOperand(1);
  4869. SDValue N2 = N->getOperand(2);
  4870. SDValue N3 = N->getOperand(3);
  4871. SDValue N4 = N->getOperand(4);
  4872. ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
  4873. // fold select_cc lhs, rhs, x, x, cc -> x
  4874. if (N2 == N3)
  4875. return N2;
  4876. // Determine if the condition we're dealing with is constant
  4877. SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
  4878. N0, N1, CC, SDLoc(N), false);
  4879. if (SCC.getNode()) {
  4880. AddToWorklist(SCC.getNode());
  4881. if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
  4882. if (!SCCC->isNullValue())
  4883. return N2; // cond always true -> true val
  4884. else
  4885. return N3; // cond always false -> false val
  4886. } else if (SCC->getOpcode() == ISD::UNDEF) {
  4887. // When the condition is UNDEF, just return the first operand. This is
  4888. // coherent the DAG creation, no setcc node is created in this case
  4889. return N2;
  4890. } else if (SCC.getOpcode() == ISD::SETCC) {
  4891. // Fold to a simpler select_cc
  4892. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
  4893. SCC.getOperand(0), SCC.getOperand(1), N2, N3,
  4894. SCC.getOperand(2));
  4895. }
  4896. }
  4897. // If we can fold this based on the true/false value, do so.
  4898. if (SimplifySelectOps(N, N2, N3))
  4899. return SDValue(N, 0); // Don't revisit N.
  4900. // fold select_cc into other things, such as min/max/abs
  4901. return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
  4902. }
  4903. SDValue DAGCombiner::visitSETCC(SDNode *N) {
  4904. return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
  4905. cast<CondCodeSDNode>(N->getOperand(2))->get(),
  4906. SDLoc(N));
  4907. }
  4908. /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
  4909. /// a build_vector of constants.
  4910. /// This function is called by the DAGCombiner when visiting sext/zext/aext
  4911. /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
  4912. /// Vector extends are not folded if operations are legal; this is to
  4913. /// avoid introducing illegal build_vector dag nodes.
  4914. static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
  4915. SelectionDAG &DAG, bool LegalTypes,
  4916. bool LegalOperations) {
  4917. unsigned Opcode = N->getOpcode();
  4918. SDValue N0 = N->getOperand(0);
  4919. EVT VT = N->getValueType(0);
  4920. assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
  4921. Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
  4922. && "Expected EXTEND dag node in input!");
  4923. // fold (sext c1) -> c1
  4924. // fold (zext c1) -> c1
  4925. // fold (aext c1) -> c1
  4926. if (isa<ConstantSDNode>(N0))
  4927. return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
  4928. // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
  4929. // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
  4930. // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
  4931. EVT SVT = VT.getScalarType();
  4932. if (!(VT.isVector() &&
  4933. (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
  4934. ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
  4935. return nullptr;
  4936. // We can fold this node into a build_vector.
  4937. unsigned VTBits = SVT.getSizeInBits();
  4938. unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
  4939. SmallVector<SDValue, 8> Elts;
  4940. unsigned NumElts = VT.getVectorNumElements();
  4941. SDLoc DL(N);
  4942. for (unsigned i=0; i != NumElts; ++i) {
  4943. SDValue Op = N0->getOperand(i);
  4944. if (Op->getOpcode() == ISD::UNDEF) {
  4945. Elts.push_back(DAG.getUNDEF(SVT));
  4946. continue;
  4947. }
  4948. SDLoc DL(Op);
  4949. // Get the constant value and if needed trunc it to the size of the type.
  4950. // Nodes like build_vector might have constants wider than the scalar type.
  4951. APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
  4952. if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
  4953. Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
  4954. else
  4955. Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
  4956. }
  4957. return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
  4958. }
  4959. // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
  4960. // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
  4961. // transformation. Returns true if extension are possible and the above
  4962. // mentioned transformation is profitable.
  4963. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
  4964. unsigned ExtOpc,
  4965. SmallVectorImpl<SDNode *> &ExtendNodes,
  4966. const TargetLowering &TLI) {
  4967. bool HasCopyToRegUses = false;
  4968. bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
  4969. for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
  4970. UE = N0.getNode()->use_end();
  4971. UI != UE; ++UI) {
  4972. SDNode *User = *UI;
  4973. if (User == N)
  4974. continue;
  4975. if (UI.getUse().getResNo() != N0.getResNo())
  4976. continue;
  4977. // FIXME: Only extend SETCC N, N and SETCC N, c for now.
  4978. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
  4979. ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
  4980. if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
  4981. // Sign bits will be lost after a zext.
  4982. return false;
  4983. bool Add = false;
  4984. for (unsigned i = 0; i != 2; ++i) {
  4985. SDValue UseOp = User->getOperand(i);
  4986. if (UseOp == N0)
  4987. continue;
  4988. if (!isa<ConstantSDNode>(UseOp))
  4989. return false;
  4990. Add = true;
  4991. }
  4992. if (Add)
  4993. ExtendNodes.push_back(User);
  4994. continue;
  4995. }
  4996. // If truncates aren't free and there are users we can't
  4997. // extend, it isn't worthwhile.
  4998. if (!isTruncFree)
  4999. return false;
  5000. // Remember if this value is live-out.
  5001. if (User->getOpcode() == ISD::CopyToReg)
  5002. HasCopyToRegUses = true;
  5003. }
  5004. if (HasCopyToRegUses) {
  5005. bool BothLiveOut = false;
  5006. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  5007. UI != UE; ++UI) {
  5008. SDUse &Use = UI.getUse();
  5009. if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
  5010. BothLiveOut = true;
  5011. break;
  5012. }
  5013. }
  5014. if (BothLiveOut)
  5015. // Both unextended and extended values are live out. There had better be
  5016. // a good reason for the transformation.
  5017. return ExtendNodes.size();
  5018. }
  5019. return true;
  5020. }
  5021. void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  5022. SDValue Trunc, SDValue ExtLoad, SDLoc DL,
  5023. ISD::NodeType ExtType) {
  5024. // Extend SetCC uses if necessary.
  5025. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
  5026. SDNode *SetCC = SetCCs[i];
  5027. SmallVector<SDValue, 4> Ops;
  5028. for (unsigned j = 0; j != 2; ++j) {
  5029. SDValue SOp = SetCC->getOperand(j);
  5030. if (SOp == Trunc)
  5031. Ops.push_back(ExtLoad);
  5032. else
  5033. Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
  5034. }
  5035. Ops.push_back(SetCC->getOperand(2));
  5036. CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
  5037. }
  5038. }
  5039. // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
  5040. SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
  5041. SDValue N0 = N->getOperand(0);
  5042. EVT DstVT = N->getValueType(0);
  5043. EVT SrcVT = N0.getValueType();
  5044. assert((N->getOpcode() == ISD::SIGN_EXTEND ||
  5045. N->getOpcode() == ISD::ZERO_EXTEND) &&
  5046. "Unexpected node type (not an extend)!");
  5047. // fold (sext (load x)) to multiple smaller sextloads; same for zext.
  5048. // For example, on a target with legal v4i32, but illegal v8i32, turn:
  5049. // (v8i32 (sext (v8i16 (load x))))
  5050. // into:
  5051. // (v8i32 (concat_vectors (v4i32 (sextload x)),
  5052. // (v4i32 (sextload (x + 16)))))
  5053. // Where uses of the original load, i.e.:
  5054. // (v8i16 (load x))
  5055. // are replaced with:
  5056. // (v8i16 (truncate
  5057. // (v8i32 (concat_vectors (v4i32 (sextload x)),
  5058. // (v4i32 (sextload (x + 16)))))))
  5059. //
  5060. // This combine is only applicable to illegal, but splittable, vectors.
  5061. // All legal types, and illegal non-vector types, are handled elsewhere.
  5062. // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
  5063. //
  5064. if (N0->getOpcode() != ISD::LOAD)
  5065. return SDValue();
  5066. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5067. if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
  5068. !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
  5069. !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
  5070. return SDValue();
  5071. SmallVector<SDNode *, 4> SetCCs;
  5072. if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
  5073. return SDValue();
  5074. ISD::LoadExtType ExtType =
  5075. N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
  5076. // Try to split the vector types to get down to legal types.
  5077. EVT SplitSrcVT = SrcVT;
  5078. EVT SplitDstVT = DstVT;
  5079. while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
  5080. SplitSrcVT.getVectorNumElements() > 1) {
  5081. SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
  5082. SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
  5083. }
  5084. if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
  5085. return SDValue();
  5086. SDLoc DL(N);
  5087. const unsigned NumSplits =
  5088. DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
  5089. const unsigned Stride = SplitSrcVT.getStoreSize();
  5090. SmallVector<SDValue, 4> Loads;
  5091. SmallVector<SDValue, 4> Chains;
  5092. SDValue BasePtr = LN0->getBasePtr();
  5093. for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
  5094. const unsigned Offset = Idx * Stride;
  5095. const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
  5096. SDValue SplitLoad = DAG.getExtLoad(
  5097. ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
  5098. LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
  5099. LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
  5100. Align, LN0->getAAInfo());
  5101. BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
  5102. DAG.getConstant(Stride, DL, BasePtr.getValueType()));
  5103. Loads.push_back(SplitLoad.getValue(0));
  5104. Chains.push_back(SplitLoad.getValue(1));
  5105. }
  5106. SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
  5107. SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
  5108. CombineTo(N, NewValue);
  5109. // Replace uses of the original load (before extension)
  5110. // with a truncate of the concatenated sextloaded vectors.
  5111. SDValue Trunc =
  5112. DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
  5113. CombineTo(N0.getNode(), Trunc, NewChain);
  5114. ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
  5115. (ISD::NodeType)N->getOpcode());
  5116. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5117. }
  5118. SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  5119. SDValue N0 = N->getOperand(0);
  5120. EVT VT = N->getValueType(0);
  5121. if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
  5122. LegalOperations))
  5123. return SDValue(Res, 0);
  5124. // fold (sext (sext x)) -> (sext x)
  5125. // fold (sext (aext x)) -> (sext x)
  5126. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  5127. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
  5128. N0.getOperand(0));
  5129. if (N0.getOpcode() == ISD::TRUNCATE) {
  5130. // fold (sext (truncate (load x))) -> (sext (smaller load x))
  5131. // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
  5132. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  5133. if (NarrowLoad.getNode()) {
  5134. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  5135. if (NarrowLoad.getNode() != N0.getNode()) {
  5136. CombineTo(N0.getNode(), NarrowLoad);
  5137. // CombineTo deleted the truncate, if needed, but not what's under it.
  5138. AddToWorklist(oye);
  5139. }
  5140. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5141. }
  5142. // See if the value being truncated is already sign extended. If so, just
  5143. // eliminate the trunc/sext pair.
  5144. SDValue Op = N0.getOperand(0);
  5145. unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
  5146. unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
  5147. unsigned DestBits = VT.getScalarType().getSizeInBits();
  5148. unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
  5149. if (OpBits == DestBits) {
  5150. // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
  5151. // bits, it is already ready.
  5152. if (NumSignBits > DestBits-MidBits)
  5153. return Op;
  5154. } else if (OpBits < DestBits) {
  5155. // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
  5156. // bits, just sext from i32.
  5157. if (NumSignBits > OpBits-MidBits)
  5158. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
  5159. } else {
  5160. // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
  5161. // bits, just truncate to i32.
  5162. if (NumSignBits > OpBits-MidBits)
  5163. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  5164. }
  5165. // fold (sext (truncate x)) -> (sextinreg x).
  5166. if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
  5167. N0.getValueType())) {
  5168. if (OpBits < DestBits)
  5169. Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
  5170. else if (OpBits > DestBits)
  5171. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
  5172. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
  5173. DAG.getValueType(N0.getValueType()));
  5174. }
  5175. }
  5176. // fold (sext (load x)) -> (sext (truncate (sextload x)))
  5177. // Only generate vector extloads when 1) they're legal, and 2) they are
  5178. // deemed desirable by the target.
  5179. if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  5180. ((!LegalOperations && !VT.isVector() &&
  5181. !cast<LoadSDNode>(N0)->isVolatile()) ||
  5182. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
  5183. bool DoXform = true;
  5184. SmallVector<SDNode*, 4> SetCCs;
  5185. if (!N0.hasOneUse())
  5186. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
  5187. if (VT.isVector())
  5188. DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
  5189. if (DoXform) {
  5190. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5191. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  5192. LN0->getChain(),
  5193. LN0->getBasePtr(), N0.getValueType(),
  5194. LN0->getMemOperand());
  5195. CombineTo(N, ExtLoad);
  5196. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  5197. N0.getValueType(), ExtLoad);
  5198. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  5199. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  5200. ISD::SIGN_EXTEND);
  5201. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5202. }
  5203. }
  5204. // fold (sext (load x)) to multiple smaller sextloads.
  5205. // Only on illegal but splittable vectors.
  5206. if (SDValue ExtLoad = CombineExtLoad(N))
  5207. return ExtLoad;
  5208. // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
  5209. // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
  5210. if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
  5211. ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
  5212. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5213. EVT MemVT = LN0->getMemoryVT();
  5214. if ((!LegalOperations && !LN0->isVolatile()) ||
  5215. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
  5216. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  5217. LN0->getChain(),
  5218. LN0->getBasePtr(), MemVT,
  5219. LN0->getMemOperand());
  5220. CombineTo(N, ExtLoad);
  5221. CombineTo(N0.getNode(),
  5222. DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  5223. N0.getValueType(), ExtLoad),
  5224. ExtLoad.getValue(1));
  5225. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5226. }
  5227. }
  5228. // fold (sext (and/or/xor (load x), cst)) ->
  5229. // (and/or/xor (sextload x), (sext cst))
  5230. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  5231. N0.getOpcode() == ISD::XOR) &&
  5232. isa<LoadSDNode>(N0.getOperand(0)) &&
  5233. N0.getOperand(1).getOpcode() == ISD::Constant &&
  5234. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
  5235. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  5236. LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
  5237. if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
  5238. bool DoXform = true;
  5239. SmallVector<SDNode*, 4> SetCCs;
  5240. if (!N0.hasOneUse())
  5241. DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
  5242. SetCCs, TLI);
  5243. if (DoXform) {
  5244. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
  5245. LN0->getChain(), LN0->getBasePtr(),
  5246. LN0->getMemoryVT(),
  5247. LN0->getMemOperand());
  5248. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  5249. Mask = Mask.sext(VT.getSizeInBits());
  5250. SDLoc DL(N);
  5251. SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
  5252. ExtLoad, DAG.getConstant(Mask, DL, VT));
  5253. SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
  5254. SDLoc(N0.getOperand(0)),
  5255. N0.getOperand(0).getValueType(), ExtLoad);
  5256. CombineTo(N, And);
  5257. CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
  5258. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
  5259. ISD::SIGN_EXTEND);
  5260. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5261. }
  5262. }
  5263. }
  5264. if (N0.getOpcode() == ISD::SETCC) {
  5265. EVT N0VT = N0.getOperand(0).getValueType();
  5266. // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
  5267. // Only do this before legalize for now.
  5268. if (VT.isVector() && !LegalOperations &&
  5269. TLI.getBooleanContents(N0VT) ==
  5270. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  5271. // On some architectures (such as SSE/NEON/etc) the SETCC result type is
  5272. // of the same size as the compared operands. Only optimize sext(setcc())
  5273. // if this is the case.
  5274. EVT SVT = getSetCCResultType(N0VT);
  5275. // We know that the # elements of the results is the same as the
  5276. // # elements of the compare (and the # elements of the compare result
  5277. // for that matter). Check to see that they are the same size. If so,
  5278. // we know that the element size of the sext'd result matches the
  5279. // element size of the compare operands.
  5280. if (VT.getSizeInBits() == SVT.getSizeInBits())
  5281. return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  5282. N0.getOperand(1),
  5283. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  5284. // If the desired elements are smaller or larger than the source
  5285. // elements we can use a matching integer vector type and then
  5286. // truncate/sign extend
  5287. EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
  5288. if (SVT == MatchingVectorType) {
  5289. SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
  5290. N0.getOperand(0), N0.getOperand(1),
  5291. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  5292. return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
  5293. }
  5294. }
  5295. // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
  5296. unsigned ElementWidth = VT.getScalarType().getSizeInBits();
  5297. SDLoc DL(N);
  5298. SDValue NegOne =
  5299. DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
  5300. SDValue SCC =
  5301. SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
  5302. NegOne, DAG.getConstant(0, DL, VT),
  5303. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  5304. if (SCC.getNode()) return SCC;
  5305. if (!VT.isVector()) {
  5306. EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
  5307. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
  5308. SDLoc DL(N);
  5309. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  5310. SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
  5311. N0.getOperand(0), N0.getOperand(1), CC);
  5312. return DAG.getSelect(DL, VT, SetCC,
  5313. NegOne, DAG.getConstant(0, DL, VT));
  5314. }
  5315. }
  5316. }
  5317. // fold (sext x) -> (zext x) if the sign bit is known zero.
  5318. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
  5319. DAG.SignBitIsZero(N0))
  5320. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
  5321. return SDValue();
  5322. }
  5323. // isTruncateOf - If N is a truncate of some other value, return true, record
  5324. // the value being truncated in Op and which of Op's bits are zero in KnownZero.
  5325. // This function computes KnownZero to avoid a duplicated call to
  5326. // computeKnownBits in the caller.
  5327. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
  5328. APInt &KnownZero) {
  5329. APInt KnownOne;
  5330. if (N->getOpcode() == ISD::TRUNCATE) {
  5331. Op = N->getOperand(0);
  5332. DAG.computeKnownBits(Op, KnownZero, KnownOne);
  5333. return true;
  5334. }
  5335. if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
  5336. cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
  5337. return false;
  5338. SDValue Op0 = N->getOperand(0);
  5339. SDValue Op1 = N->getOperand(1);
  5340. assert(Op0.getValueType() == Op1.getValueType());
  5341. if (isNullConstant(Op0))
  5342. Op = Op1;
  5343. else if (isNullConstant(Op1))
  5344. Op = Op0;
  5345. else
  5346. return false;
  5347. DAG.computeKnownBits(Op, KnownZero, KnownOne);
  5348. if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
  5349. return false;
  5350. return true;
  5351. }
  5352. SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  5353. SDValue N0 = N->getOperand(0);
  5354. EVT VT = N->getValueType(0);
  5355. if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
  5356. LegalOperations))
  5357. return SDValue(Res, 0);
  5358. // fold (zext (zext x)) -> (zext x)
  5359. // fold (zext (aext x)) -> (zext x)
  5360. if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  5361. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
  5362. N0.getOperand(0));
  5363. // fold (zext (truncate x)) -> (zext x) or
  5364. // (zext (truncate x)) -> (truncate x)
  5365. // This is valid when the truncated bits of x are already zero.
  5366. // FIXME: We should extend this to work for vectors too.
  5367. SDValue Op;
  5368. APInt KnownZero;
  5369. if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
  5370. APInt TruncatedBits =
  5371. (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
  5372. APInt(Op.getValueSizeInBits(), 0) :
  5373. APInt::getBitsSet(Op.getValueSizeInBits(),
  5374. N0.getValueSizeInBits(),
  5375. std::min(Op.getValueSizeInBits(),
  5376. VT.getSizeInBits()));
  5377. if (TruncatedBits == (KnownZero & TruncatedBits)) {
  5378. if (VT.bitsGT(Op.getValueType()))
  5379. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
  5380. if (VT.bitsLT(Op.getValueType()))
  5381. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  5382. return Op;
  5383. }
  5384. }
  5385. // fold (zext (truncate (load x))) -> (zext (smaller load x))
  5386. // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
  5387. if (N0.getOpcode() == ISD::TRUNCATE) {
  5388. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  5389. if (NarrowLoad.getNode()) {
  5390. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  5391. if (NarrowLoad.getNode() != N0.getNode()) {
  5392. CombineTo(N0.getNode(), NarrowLoad);
  5393. // CombineTo deleted the truncate, if needed, but not what's under it.
  5394. AddToWorklist(oye);
  5395. }
  5396. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5397. }
  5398. }
  5399. // fold (zext (truncate x)) -> (and x, mask)
  5400. if (N0.getOpcode() == ISD::TRUNCATE &&
  5401. (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
  5402. // fold (zext (truncate (load x))) -> (zext (smaller load x))
  5403. // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
  5404. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  5405. if (NarrowLoad.getNode()) {
  5406. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  5407. if (NarrowLoad.getNode() != N0.getNode()) {
  5408. CombineTo(N0.getNode(), NarrowLoad);
  5409. // CombineTo deleted the truncate, if needed, but not what's under it.
  5410. AddToWorklist(oye);
  5411. }
  5412. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5413. }
  5414. SDValue Op = N0.getOperand(0);
  5415. if (Op.getValueType().bitsLT(VT)) {
  5416. Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
  5417. AddToWorklist(Op.getNode());
  5418. } else if (Op.getValueType().bitsGT(VT)) {
  5419. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  5420. AddToWorklist(Op.getNode());
  5421. }
  5422. return DAG.getZeroExtendInReg(Op, SDLoc(N),
  5423. N0.getValueType().getScalarType());
  5424. }
  5425. // Fold (zext (and (trunc x), cst)) -> (and x, cst),
  5426. // if either of the casts is not free.
  5427. if (N0.getOpcode() == ISD::AND &&
  5428. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  5429. N0.getOperand(1).getOpcode() == ISD::Constant &&
  5430. (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  5431. N0.getValueType()) ||
  5432. !TLI.isZExtFree(N0.getValueType(), VT))) {
  5433. SDValue X = N0.getOperand(0).getOperand(0);
  5434. if (X.getValueType().bitsLT(VT)) {
  5435. X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
  5436. } else if (X.getValueType().bitsGT(VT)) {
  5437. X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
  5438. }
  5439. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  5440. Mask = Mask.zext(VT.getSizeInBits());
  5441. SDLoc DL(N);
  5442. return DAG.getNode(ISD::AND, DL, VT,
  5443. X, DAG.getConstant(Mask, DL, VT));
  5444. }
  5445. // fold (zext (load x)) -> (zext (truncate (zextload x)))
  5446. // Only generate vector extloads when 1) they're legal, and 2) they are
  5447. // deemed desirable by the target.
  5448. if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  5449. ((!LegalOperations && !VT.isVector() &&
  5450. !cast<LoadSDNode>(N0)->isVolatile()) ||
  5451. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
  5452. bool DoXform = true;
  5453. SmallVector<SDNode*, 4> SetCCs;
  5454. if (!N0.hasOneUse())
  5455. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
  5456. if (VT.isVector())
  5457. DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
  5458. if (DoXform) {
  5459. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5460. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
  5461. LN0->getChain(),
  5462. LN0->getBasePtr(), N0.getValueType(),
  5463. LN0->getMemOperand());
  5464. CombineTo(N, ExtLoad);
  5465. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  5466. N0.getValueType(), ExtLoad);
  5467. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  5468. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  5469. ISD::ZERO_EXTEND);
  5470. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5471. }
  5472. }
  5473. // fold (zext (load x)) to multiple smaller zextloads.
  5474. // Only on illegal but splittable vectors.
  5475. if (SDValue ExtLoad = CombineExtLoad(N))
  5476. return ExtLoad;
  5477. // fold (zext (and/or/xor (load x), cst)) ->
  5478. // (and/or/xor (zextload x), (zext cst))
  5479. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  5480. N0.getOpcode() == ISD::XOR) &&
  5481. isa<LoadSDNode>(N0.getOperand(0)) &&
  5482. N0.getOperand(1).getOpcode() == ISD::Constant &&
  5483. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
  5484. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  5485. LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
  5486. if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
  5487. bool DoXform = true;
  5488. SmallVector<SDNode*, 4> SetCCs;
  5489. if (!N0.hasOneUse())
  5490. DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
  5491. SetCCs, TLI);
  5492. if (DoXform) {
  5493. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
  5494. LN0->getChain(), LN0->getBasePtr(),
  5495. LN0->getMemoryVT(),
  5496. LN0->getMemOperand());
  5497. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  5498. Mask = Mask.zext(VT.getSizeInBits());
  5499. SDLoc DL(N);
  5500. SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
  5501. ExtLoad, DAG.getConstant(Mask, DL, VT));
  5502. SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
  5503. SDLoc(N0.getOperand(0)),
  5504. N0.getOperand(0).getValueType(), ExtLoad);
  5505. CombineTo(N, And);
  5506. CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
  5507. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
  5508. ISD::ZERO_EXTEND);
  5509. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5510. }
  5511. }
  5512. }
  5513. // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
  5514. // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
  5515. if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
  5516. ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
  5517. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5518. EVT MemVT = LN0->getMemoryVT();
  5519. if ((!LegalOperations && !LN0->isVolatile()) ||
  5520. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
  5521. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
  5522. LN0->getChain(),
  5523. LN0->getBasePtr(), MemVT,
  5524. LN0->getMemOperand());
  5525. CombineTo(N, ExtLoad);
  5526. CombineTo(N0.getNode(),
  5527. DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
  5528. ExtLoad),
  5529. ExtLoad.getValue(1));
  5530. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5531. }
  5532. }
  5533. if (N0.getOpcode() == ISD::SETCC) {
  5534. if (!LegalOperations && VT.isVector() &&
  5535. N0.getValueType().getVectorElementType() == MVT::i1) {
  5536. EVT N0VT = N0.getOperand(0).getValueType();
  5537. if (getSetCCResultType(N0VT) == N0.getValueType())
  5538. return SDValue();
  5539. // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
  5540. // Only do this before legalize for now.
  5541. EVT EltVT = VT.getVectorElementType();
  5542. SDLoc DL(N);
  5543. SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
  5544. DAG.getConstant(1, DL, EltVT));
  5545. if (VT.getSizeInBits() == N0VT.getSizeInBits())
  5546. // We know that the # elements of the results is the same as the
  5547. // # elements of the compare (and the # elements of the compare result
  5548. // for that matter). Check to see that they are the same size. If so,
  5549. // we know that the element size of the sext'd result matches the
  5550. // element size of the compare operands.
  5551. return DAG.getNode(ISD::AND, DL, VT,
  5552. DAG.getSetCC(DL, VT, N0.getOperand(0),
  5553. N0.getOperand(1),
  5554. cast<CondCodeSDNode>(N0.getOperand(2))->get()),
  5555. DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
  5556. OneOps));
  5557. // If the desired elements are smaller or larger than the source
  5558. // elements we can use a matching integer vector type and then
  5559. // truncate/sign extend
  5560. EVT MatchingElementType =
  5561. EVT::getIntegerVT(*DAG.getContext(),
  5562. N0VT.getScalarType().getSizeInBits());
  5563. EVT MatchingVectorType =
  5564. EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
  5565. N0VT.getVectorNumElements());
  5566. SDValue VsetCC =
  5567. DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
  5568. N0.getOperand(1),
  5569. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  5570. return DAG.getNode(ISD::AND, DL, VT,
  5571. DAG.getSExtOrTrunc(VsetCC, DL, VT),
  5572. DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
  5573. }
  5574. // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  5575. SDLoc DL(N);
  5576. SDValue SCC =
  5577. SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
  5578. DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
  5579. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  5580. if (SCC.getNode()) return SCC;
  5581. }
  5582. // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
  5583. if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
  5584. isa<ConstantSDNode>(N0.getOperand(1)) &&
  5585. N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
  5586. N0.hasOneUse()) {
  5587. SDValue ShAmt = N0.getOperand(1);
  5588. unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
  5589. if (N0.getOpcode() == ISD::SHL) {
  5590. SDValue InnerZExt = N0.getOperand(0);
  5591. // If the original shl may be shifting out bits, do not perform this
  5592. // transformation.
  5593. unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
  5594. InnerZExt.getOperand(0).getValueType().getSizeInBits();
  5595. if (ShAmtVal > KnownZeroBits)
  5596. return SDValue();
  5597. }
  5598. SDLoc DL(N);
  5599. // Ensure that the shift amount is wide enough for the shifted value.
  5600. if (VT.getSizeInBits() >= 256)
  5601. ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
  5602. return DAG.getNode(N0.getOpcode(), DL, VT,
  5603. DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
  5604. ShAmt);
  5605. }
  5606. return SDValue();
  5607. }
  5608. SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
  5609. SDValue N0 = N->getOperand(0);
  5610. EVT VT = N->getValueType(0);
  5611. if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
  5612. LegalOperations))
  5613. return SDValue(Res, 0);
  5614. // fold (aext (aext x)) -> (aext x)
  5615. // fold (aext (zext x)) -> (zext x)
  5616. // fold (aext (sext x)) -> (sext x)
  5617. if (N0.getOpcode() == ISD::ANY_EXTEND ||
  5618. N0.getOpcode() == ISD::ZERO_EXTEND ||
  5619. N0.getOpcode() == ISD::SIGN_EXTEND)
  5620. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
  5621. // fold (aext (truncate (load x))) -> (aext (smaller load x))
  5622. // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
  5623. if (N0.getOpcode() == ISD::TRUNCATE) {
  5624. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  5625. if (NarrowLoad.getNode()) {
  5626. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  5627. if (NarrowLoad.getNode() != N0.getNode()) {
  5628. CombineTo(N0.getNode(), NarrowLoad);
  5629. // CombineTo deleted the truncate, if needed, but not what's under it.
  5630. AddToWorklist(oye);
  5631. }
  5632. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5633. }
  5634. }
  5635. // fold (aext (truncate x))
  5636. if (N0.getOpcode() == ISD::TRUNCATE) {
  5637. SDValue TruncOp = N0.getOperand(0);
  5638. if (TruncOp.getValueType() == VT)
  5639. return TruncOp; // x iff x size == zext size.
  5640. if (TruncOp.getValueType().bitsGT(VT))
  5641. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
  5642. return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
  5643. }
  5644. // Fold (aext (and (trunc x), cst)) -> (and x, cst)
  5645. // if the trunc is not free.
  5646. if (N0.getOpcode() == ISD::AND &&
  5647. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  5648. N0.getOperand(1).getOpcode() == ISD::Constant &&
  5649. !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  5650. N0.getValueType())) {
  5651. SDValue X = N0.getOperand(0).getOperand(0);
  5652. if (X.getValueType().bitsLT(VT)) {
  5653. X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
  5654. } else if (X.getValueType().bitsGT(VT)) {
  5655. X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
  5656. }
  5657. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  5658. Mask = Mask.zext(VT.getSizeInBits());
  5659. SDLoc DL(N);
  5660. return DAG.getNode(ISD::AND, DL, VT,
  5661. X, DAG.getConstant(Mask, DL, VT));
  5662. }
  5663. // fold (aext (load x)) -> (aext (truncate (extload x)))
  5664. // None of the supported targets knows how to perform load and any_ext
  5665. // on vectors in one instruction. We only perform this transformation on
  5666. // scalars.
  5667. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
  5668. ISD::isUNINDEXEDLoad(N0.getNode()) &&
  5669. TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
  5670. bool DoXform = true;
  5671. SmallVector<SDNode*, 4> SetCCs;
  5672. if (!N0.hasOneUse())
  5673. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
  5674. if (DoXform) {
  5675. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5676. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  5677. LN0->getChain(),
  5678. LN0->getBasePtr(), N0.getValueType(),
  5679. LN0->getMemOperand());
  5680. CombineTo(N, ExtLoad);
  5681. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  5682. N0.getValueType(), ExtLoad);
  5683. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  5684. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  5685. ISD::ANY_EXTEND);
  5686. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5687. }
  5688. }
  5689. // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
  5690. // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
  5691. // fold (aext ( extload x)) -> (aext (truncate (extload x)))
  5692. if (N0.getOpcode() == ISD::LOAD &&
  5693. !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  5694. N0.hasOneUse()) {
  5695. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5696. ISD::LoadExtType ExtType = LN0->getExtensionType();
  5697. EVT MemVT = LN0->getMemoryVT();
  5698. if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
  5699. SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
  5700. VT, LN0->getChain(), LN0->getBasePtr(),
  5701. MemVT, LN0->getMemOperand());
  5702. CombineTo(N, ExtLoad);
  5703. CombineTo(N0.getNode(),
  5704. DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  5705. N0.getValueType(), ExtLoad),
  5706. ExtLoad.getValue(1));
  5707. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5708. }
  5709. }
  5710. if (N0.getOpcode() == ISD::SETCC) {
  5711. // For vectors:
  5712. // aext(setcc) -> vsetcc
  5713. // aext(setcc) -> truncate(vsetcc)
  5714. // aext(setcc) -> aext(vsetcc)
  5715. // Only do this before legalize for now.
  5716. if (VT.isVector() && !LegalOperations) {
  5717. EVT N0VT = N0.getOperand(0).getValueType();
  5718. // We know that the # elements of the results is the same as the
  5719. // # elements of the compare (and the # elements of the compare result
  5720. // for that matter). Check to see that they are the same size. If so,
  5721. // we know that the element size of the sext'd result matches the
  5722. // element size of the compare operands.
  5723. if (VT.getSizeInBits() == N0VT.getSizeInBits())
  5724. return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  5725. N0.getOperand(1),
  5726. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  5727. // If the desired elements are smaller or larger than the source
  5728. // elements we can use a matching integer vector type and then
  5729. // truncate/any extend
  5730. else {
  5731. EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
  5732. SDValue VsetCC =
  5733. DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
  5734. N0.getOperand(1),
  5735. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  5736. return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
  5737. }
  5738. }
  5739. // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  5740. SDLoc DL(N);
  5741. SDValue SCC =
  5742. SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
  5743. DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
  5744. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  5745. if (SCC.getNode())
  5746. return SCC;
  5747. }
  5748. return SDValue();
  5749. }
  5750. /// See if the specified operand can be simplified with the knowledge that only
  5751. /// the bits specified by Mask are used. If so, return the simpler operand,
  5752. /// otherwise return a null SDValue.
  5753. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
  5754. switch (V.getOpcode()) {
  5755. default: break;
  5756. case ISD::Constant: {
  5757. const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
  5758. assert(CV && "Const value should be ConstSDNode.");
  5759. const APInt &CVal = CV->getAPIntValue();
  5760. APInt NewVal = CVal & Mask;
  5761. if (NewVal != CVal)
  5762. return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
  5763. break;
  5764. }
  5765. case ISD::OR:
  5766. case ISD::XOR:
  5767. // If the LHS or RHS don't contribute bits to the or, drop them.
  5768. if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
  5769. return V.getOperand(1);
  5770. if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
  5771. return V.getOperand(0);
  5772. break;
  5773. case ISD::SRL:
  5774. // Only look at single-use SRLs.
  5775. if (!V.getNode()->hasOneUse())
  5776. break;
  5777. if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
  5778. // See if we can recursively simplify the LHS.
  5779. unsigned Amt = RHSC->getZExtValue();
  5780. // Watch out for shift count overflow though.
  5781. if (Amt >= Mask.getBitWidth()) break;
  5782. APInt NewMask = Mask << Amt;
  5783. SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
  5784. if (SimplifyLHS.getNode())
  5785. return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
  5786. SimplifyLHS, V.getOperand(1));
  5787. }
  5788. }
  5789. return SDValue();
  5790. }
  5791. /// If the result of a wider load is shifted to right of N bits and then
  5792. /// truncated to a narrower type and where N is a multiple of number of bits of
  5793. /// the narrower type, transform it to a narrower load from address + N / num of
  5794. /// bits of new type. If the result is to be extended, also fold the extension
  5795. /// to form a extending load.
  5796. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  5797. unsigned Opc = N->getOpcode();
  5798. ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
  5799. SDValue N0 = N->getOperand(0);
  5800. EVT VT = N->getValueType(0);
  5801. EVT ExtVT = VT;
  5802. // This transformation isn't valid for vector loads.
  5803. if (VT.isVector())
  5804. return SDValue();
  5805. // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
  5806. // extended to VT.
  5807. if (Opc == ISD::SIGN_EXTEND_INREG) {
  5808. ExtType = ISD::SEXTLOAD;
  5809. ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
  5810. } else if (Opc == ISD::SRL) {
  5811. // Another special-case: SRL is basically zero-extending a narrower value.
  5812. ExtType = ISD::ZEXTLOAD;
  5813. N0 = SDValue(N, 0);
  5814. ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  5815. if (!N01) return SDValue();
  5816. ExtVT = EVT::getIntegerVT(*DAG.getContext(),
  5817. VT.getSizeInBits() - N01->getZExtValue());
  5818. }
  5819. if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
  5820. return SDValue();
  5821. unsigned EVTBits = ExtVT.getSizeInBits();
  5822. // Do not generate loads of non-round integer types since these can
  5823. // be expensive (and would be wrong if the type is not byte sized).
  5824. if (!ExtVT.isRound())
  5825. return SDValue();
  5826. unsigned ShAmt = 0;
  5827. if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
  5828. if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  5829. ShAmt = N01->getZExtValue();
  5830. // Is the shift amount a multiple of size of VT?
  5831. if ((ShAmt & (EVTBits-1)) == 0) {
  5832. N0 = N0.getOperand(0);
  5833. // Is the load width a multiple of size of VT?
  5834. if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
  5835. return SDValue();
  5836. }
  5837. // At this point, we must have a load or else we can't do the transform.
  5838. if (!isa<LoadSDNode>(N0)) return SDValue();
  5839. // Because a SRL must be assumed to *need* to zero-extend the high bits
  5840. // (as opposed to anyext the high bits), we can't combine the zextload
  5841. // lowering of SRL and an sextload.
  5842. if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
  5843. return SDValue();
  5844. // If the shift amount is larger than the input type then we're not
  5845. // accessing any of the loaded bytes. If the load was a zextload/extload
  5846. // then the result of the shift+trunc is zero/undef (handled elsewhere).
  5847. if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
  5848. return SDValue();
  5849. }
  5850. }
  5851. // If the load is shifted left (and the result isn't shifted back right),
  5852. // we can fold the truncate through the shift.
  5853. unsigned ShLeftAmt = 0;
  5854. if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
  5855. ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
  5856. if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  5857. ShLeftAmt = N01->getZExtValue();
  5858. N0 = N0.getOperand(0);
  5859. }
  5860. }
  5861. // If we haven't found a load, we can't narrow it. Don't transform one with
  5862. // multiple uses, this would require adding a new load.
  5863. if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
  5864. return SDValue();
  5865. // Don't change the width of a volatile load.
  5866. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5867. if (LN0->isVolatile())
  5868. return SDValue();
  5869. // Verify that we are actually reducing a load width here.
  5870. if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
  5871. return SDValue();
  5872. // For the transform to be legal, the load must produce only two values
  5873. // (the value loaded and the chain). Don't transform a pre-increment
  5874. // load, for example, which produces an extra value. Otherwise the
  5875. // transformation is not equivalent, and the downstream logic to replace
  5876. // uses gets things wrong.
  5877. if (LN0->getNumValues() > 2)
  5878. return SDValue();
  5879. // If the load that we're shrinking is an extload and we're not just
  5880. // discarding the extension we can't simply shrink the load. Bail.
  5881. // TODO: It would be possible to merge the extensions in some cases.
  5882. if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
  5883. LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
  5884. return SDValue();
  5885. if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
  5886. return SDValue();
  5887. EVT PtrType = N0.getOperand(1).getValueType();
  5888. if (PtrType == MVT::Untyped || PtrType.isExtended())
  5889. // It's not possible to generate a constant of extended or untyped type.
  5890. return SDValue();
  5891. // For big endian targets, we need to adjust the offset to the pointer to
  5892. // load the correct bytes.
  5893. if (DAG.getDataLayout().isBigEndian()) {
  5894. unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
  5895. unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
  5896. ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
  5897. }
  5898. uint64_t PtrOff = ShAmt / 8;
  5899. unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
  5900. SDLoc DL(LN0);
  5901. SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
  5902. PtrType, LN0->getBasePtr(),
  5903. DAG.getConstant(PtrOff, DL, PtrType));
  5904. AddToWorklist(NewPtr.getNode());
  5905. SDValue Load;
  5906. if (ExtType == ISD::NON_EXTLOAD)
  5907. Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
  5908. LN0->getPointerInfo().getWithOffset(PtrOff),
  5909. LN0->isVolatile(), LN0->isNonTemporal(),
  5910. LN0->isInvariant(), NewAlign, LN0->getAAInfo());
  5911. else
  5912. Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
  5913. LN0->getPointerInfo().getWithOffset(PtrOff),
  5914. ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
  5915. LN0->isInvariant(), NewAlign, LN0->getAAInfo());
  5916. // Replace the old load's chain with the new load's chain.
  5917. WorklistRemover DeadNodes(*this);
  5918. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  5919. // Shift the result left, if we've swallowed a left shift.
  5920. SDValue Result = Load;
  5921. if (ShLeftAmt != 0) {
  5922. EVT ShImmTy = getShiftAmountTy(Result.getValueType());
  5923. if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
  5924. ShImmTy = VT;
  5925. // If the shift amount is as large as the result size (but, presumably,
  5926. // no larger than the source) then the useful bits of the result are
  5927. // zero; we can't simply return the shortened shift, because the result
  5928. // of that operation is undefined.
  5929. SDLoc DL(N0);
  5930. if (ShLeftAmt >= VT.getSizeInBits())
  5931. Result = DAG.getConstant(0, DL, VT);
  5932. else
  5933. Result = DAG.getNode(ISD::SHL, DL, VT,
  5934. Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
  5935. }
  5936. // Return the new loaded value.
  5937. return Result;
  5938. }
  5939. SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  5940. SDValue N0 = N->getOperand(0);
  5941. SDValue N1 = N->getOperand(1);
  5942. EVT VT = N->getValueType(0);
  5943. EVT EVT = cast<VTSDNode>(N1)->getVT();
  5944. unsigned VTBits = VT.getScalarType().getSizeInBits();
  5945. unsigned EVTBits = EVT.getScalarType().getSizeInBits();
  5946. // fold (sext_in_reg c1) -> c1
  5947. if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
  5948. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
  5949. // If the input is already sign extended, just drop the extension.
  5950. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
  5951. return N0;
  5952. // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
  5953. if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  5954. EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
  5955. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  5956. N0.getOperand(0), N1);
  5957. // fold (sext_in_reg (sext x)) -> (sext x)
  5958. // fold (sext_in_reg (aext x)) -> (sext x)
  5959. // if x is small enough.
  5960. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
  5961. SDValue N00 = N0.getOperand(0);
  5962. if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
  5963. (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
  5964. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
  5965. }
  5966. // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
  5967. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
  5968. return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
  5969. // fold operands of sext_in_reg based on knowledge that the top bits are not
  5970. // demanded.
  5971. if (SimplifyDemandedBits(SDValue(N, 0)))
  5972. return SDValue(N, 0);
  5973. // fold (sext_in_reg (load x)) -> (smaller sextload x)
  5974. // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
  5975. SDValue NarrowLoad = ReduceLoadWidth(N);
  5976. if (NarrowLoad.getNode())
  5977. return NarrowLoad;
  5978. // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
  5979. // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
  5980. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
  5981. if (N0.getOpcode() == ISD::SRL) {
  5982. if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
  5983. if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
  5984. // We can turn this into an SRA iff the input to the SRL is already sign
  5985. // extended enough.
  5986. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
  5987. if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
  5988. return DAG.getNode(ISD::SRA, SDLoc(N), VT,
  5989. N0.getOperand(0), N0.getOperand(1));
  5990. }
  5991. }
  5992. // fold (sext_inreg (extload x)) -> (sextload x)
  5993. if (ISD::isEXTLoad(N0.getNode()) &&
  5994. ISD::isUNINDEXEDLoad(N0.getNode()) &&
  5995. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  5996. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  5997. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
  5998. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5999. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  6000. LN0->getChain(),
  6001. LN0->getBasePtr(), EVT,
  6002. LN0->getMemOperand());
  6003. CombineTo(N, ExtLoad);
  6004. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  6005. AddToWorklist(ExtLoad.getNode());
  6006. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  6007. }
  6008. // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
  6009. if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  6010. N0.hasOneUse() &&
  6011. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  6012. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  6013. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
  6014. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  6015. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  6016. LN0->getChain(),
  6017. LN0->getBasePtr(), EVT,
  6018. LN0->getMemOperand());
  6019. CombineTo(N, ExtLoad);
  6020. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  6021. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  6022. }
  6023. // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
  6024. if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
  6025. SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  6026. N0.getOperand(1), false);
  6027. if (BSwap.getNode())
  6028. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  6029. BSwap, N1);
  6030. }
  6031. // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
  6032. // into a build_vector.
  6033. if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
  6034. SmallVector<SDValue, 8> Elts;
  6035. unsigned NumElts = N0->getNumOperands();
  6036. unsigned ShAmt = VTBits - EVTBits;
  6037. for (unsigned i = 0; i != NumElts; ++i) {
  6038. SDValue Op = N0->getOperand(i);
  6039. if (Op->getOpcode() == ISD::UNDEF) {
  6040. Elts.push_back(Op);
  6041. continue;
  6042. }
  6043. ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
  6044. const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
  6045. Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
  6046. SDLoc(Op), Op.getValueType()));
  6047. }
  6048. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
  6049. }
  6050. return SDValue();
  6051. }
  6052. SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
  6053. SDValue N0 = N->getOperand(0);
  6054. EVT VT = N->getValueType(0);
  6055. if (N0.getOpcode() == ISD::UNDEF)
  6056. return DAG.getUNDEF(VT);
  6057. if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
  6058. LegalOperations))
  6059. return SDValue(Res, 0);
  6060. return SDValue();
  6061. }
  6062. SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
  6063. SDValue N0 = N->getOperand(0);
  6064. EVT VT = N->getValueType(0);
  6065. bool isLE = DAG.getDataLayout().isLittleEndian();
  6066. // noop truncate
  6067. if (N0.getValueType() == N->getValueType(0))
  6068. return N0;
  6069. // fold (truncate c1) -> c1
  6070. if (isConstantIntBuildVectorOrConstantInt(N0))
  6071. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
  6072. // fold (truncate (truncate x)) -> (truncate x)
  6073. if (N0.getOpcode() == ISD::TRUNCATE)
  6074. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  6075. // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
  6076. if (N0.getOpcode() == ISD::ZERO_EXTEND ||
  6077. N0.getOpcode() == ISD::SIGN_EXTEND ||
  6078. N0.getOpcode() == ISD::ANY_EXTEND) {
  6079. if (N0.getOperand(0).getValueType().bitsLT(VT))
  6080. // if the source is smaller than the dest, we still need an extend
  6081. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  6082. N0.getOperand(0));
  6083. if (N0.getOperand(0).getValueType().bitsGT(VT))
  6084. // if the source is larger than the dest, than we just need the truncate
  6085. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  6086. // if the source and dest are the same type, we can drop both the extend
  6087. // and the truncate.
  6088. return N0.getOperand(0);
  6089. }
  6090. // Fold extract-and-trunc into a narrow extract. For example:
  6091. // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
  6092. // i32 y = TRUNCATE(i64 x)
  6093. // -- becomes --
  6094. // v16i8 b = BITCAST (v2i64 val)
  6095. // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
  6096. //
  6097. // Note: We only run this optimization after type legalization (which often
  6098. // creates this pattern) and before operation legalization after which
  6099. // we need to be more careful about the vector instructions that we generate.
  6100. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  6101. LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
  6102. EVT VecTy = N0.getOperand(0).getValueType();
  6103. EVT ExTy = N0.getValueType();
  6104. EVT TrTy = N->getValueType(0);
  6105. unsigned NumElem = VecTy.getVectorNumElements();
  6106. unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
  6107. EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
  6108. assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
  6109. SDValue EltNo = N0->getOperand(1);
  6110. if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
  6111. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  6112. EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  6113. int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
  6114. SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
  6115. NVT, N0.getOperand(0));
  6116. SDLoc DL(N);
  6117. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
  6118. DL, TrTy, V,
  6119. DAG.getConstant(Index, DL, IndexTy));
  6120. }
  6121. }
  6122. // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
  6123. if (N0.getOpcode() == ISD::SELECT) {
  6124. EVT SrcVT = N0.getValueType();
  6125. if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
  6126. TLI.isTruncateFree(SrcVT, VT)) {
  6127. SDLoc SL(N0);
  6128. SDValue Cond = N0.getOperand(0);
  6129. SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
  6130. SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
  6131. return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
  6132. }
  6133. }
  6134. // Fold a series of buildvector, bitcast, and truncate if possible.
  6135. // For example fold
  6136. // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
  6137. // (2xi32 (buildvector x, y)).
  6138. if (Level == AfterLegalizeVectorOps && VT.isVector() &&
  6139. N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
  6140. N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
  6141. N0.getOperand(0).hasOneUse()) {
  6142. SDValue BuildVect = N0.getOperand(0);
  6143. EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
  6144. EVT TruncVecEltTy = VT.getVectorElementType();
  6145. // Check that the element types match.
  6146. if (BuildVectEltTy == TruncVecEltTy) {
  6147. // Now we only need to compute the offset of the truncated elements.
  6148. unsigned BuildVecNumElts = BuildVect.getNumOperands();
  6149. unsigned TruncVecNumElts = VT.getVectorNumElements();
  6150. unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
  6151. assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
  6152. "Invalid number of elements");
  6153. SmallVector<SDValue, 8> Opnds;
  6154. for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
  6155. Opnds.push_back(BuildVect.getOperand(i));
  6156. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
  6157. }
  6158. }
  6159. // See if we can simplify the input to this truncate through knowledge that
  6160. // only the low bits are being used.
  6161. // For example "trunc (or (shl x, 8), y)" // -> trunc y
  6162. // Currently we only perform this optimization on scalars because vectors
  6163. // may have different active low bits.
  6164. if (!VT.isVector()) {
  6165. SDValue Shorter =
  6166. GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
  6167. VT.getSizeInBits()));
  6168. if (Shorter.getNode())
  6169. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
  6170. }
  6171. // fold (truncate (load x)) -> (smaller load x)
  6172. // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
  6173. if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
  6174. SDValue Reduced = ReduceLoadWidth(N);
  6175. if (Reduced.getNode())
  6176. return Reduced;
  6177. // Handle the case where the load remains an extending load even
  6178. // after truncation.
  6179. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
  6180. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  6181. if (!LN0->isVolatile() &&
  6182. LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
  6183. SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
  6184. VT, LN0->getChain(), LN0->getBasePtr(),
  6185. LN0->getMemoryVT(),
  6186. LN0->getMemOperand());
  6187. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
  6188. return NewLoad;
  6189. }
  6190. }
  6191. }
  6192. // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
  6193. // where ... are all 'undef'.
  6194. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
  6195. SmallVector<EVT, 8> VTs;
  6196. SDValue V;
  6197. unsigned Idx = 0;
  6198. unsigned NumDefs = 0;
  6199. for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
  6200. SDValue X = N0.getOperand(i);
  6201. if (X.getOpcode() != ISD::UNDEF) {
  6202. V = X;
  6203. Idx = i;
  6204. NumDefs++;
  6205. }
  6206. // Stop if more than one members are non-undef.
  6207. if (NumDefs > 1)
  6208. break;
  6209. VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
  6210. VT.getVectorElementType(),
  6211. X.getValueType().getVectorNumElements()));
  6212. }
  6213. if (NumDefs == 0)
  6214. return DAG.getUNDEF(VT);
  6215. if (NumDefs == 1) {
  6216. assert(V.getNode() && "The single defined operand is empty!");
  6217. SmallVector<SDValue, 8> Opnds;
  6218. for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
  6219. if (i != Idx) {
  6220. Opnds.push_back(DAG.getUNDEF(VTs[i]));
  6221. continue;
  6222. }
  6223. SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
  6224. AddToWorklist(NV.getNode());
  6225. Opnds.push_back(NV);
  6226. }
  6227. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
  6228. }
  6229. }
  6230. // Simplify the operands using demanded-bits information.
  6231. if (!VT.isVector() &&
  6232. SimplifyDemandedBits(SDValue(N, 0)))
  6233. return SDValue(N, 0);
  6234. return SDValue();
  6235. }
  6236. static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
  6237. SDValue Elt = N->getOperand(i);
  6238. if (Elt.getOpcode() != ISD::MERGE_VALUES)
  6239. return Elt.getNode();
  6240. return Elt.getOperand(Elt.getResNo()).getNode();
  6241. }
  6242. /// build_pair (load, load) -> load
  6243. /// if load locations are consecutive.
  6244. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
  6245. assert(N->getOpcode() == ISD::BUILD_PAIR);
  6246. LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
  6247. LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
  6248. if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
  6249. LD1->getAddressSpace() != LD2->getAddressSpace())
  6250. return SDValue();
  6251. EVT LD1VT = LD1->getValueType(0);
  6252. if (ISD::isNON_EXTLoad(LD2) &&
  6253. LD2->hasOneUse() &&
  6254. // If both are volatile this would reduce the number of volatile loads.
  6255. // If one is volatile it might be ok, but play conservative and bail out.
  6256. !LD1->isVolatile() &&
  6257. !LD2->isVolatile() &&
  6258. DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
  6259. unsigned Align = LD1->getAlignment();
  6260. unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
  6261. VT.getTypeForEVT(*DAG.getContext()));
  6262. if (NewAlign <= Align &&
  6263. (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
  6264. return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
  6265. LD1->getBasePtr(), LD1->getPointerInfo(),
  6266. false, false, false, Align);
  6267. }
  6268. return SDValue();
  6269. }
  6270. SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  6271. SDValue N0 = N->getOperand(0);
  6272. EVT VT = N->getValueType(0);
  6273. // If the input is a BUILD_VECTOR with all constant elements, fold this now.
  6274. // Only do this before legalize, since afterward the target may be depending
  6275. // on the bitconvert.
  6276. // First check to see if this is all constant.
  6277. if (!LegalTypes &&
  6278. N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
  6279. VT.isVector()) {
  6280. bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
  6281. EVT DestEltVT = N->getValueType(0).getVectorElementType();
  6282. assert(!DestEltVT.isVector() &&
  6283. "Element type of vector ValueType must not be vector!");
  6284. if (isSimple)
  6285. return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
  6286. }
  6287. // If the input is a constant, let getNode fold it.
  6288. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
  6289. // If we can't allow illegal operations, we need to check that this is just
  6290. // a fp -> int or int -> conversion and that the resulting operation will
  6291. // be legal.
  6292. if (!LegalOperations ||
  6293. (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
  6294. TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
  6295. (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
  6296. TLI.isOperationLegal(ISD::Constant, VT)))
  6297. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
  6298. }
  6299. // (conv (conv x, t1), t2) -> (conv x, t2)
  6300. if (N0.getOpcode() == ISD::BITCAST)
  6301. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
  6302. N0.getOperand(0));
  6303. // fold (conv (load x)) -> (load (conv*)x)
  6304. // If the resultant load doesn't need a higher alignment than the original!
  6305. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  6306. // Do not change the width of a volatile load.
  6307. !cast<LoadSDNode>(N0)->isVolatile() &&
  6308. // Do not remove the cast if the types differ in endian layout.
  6309. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
  6310. TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
  6311. (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
  6312. TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
  6313. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  6314. unsigned Align = DAG.getDataLayout().getABITypeAlignment(
  6315. VT.getTypeForEVT(*DAG.getContext()));
  6316. unsigned OrigAlign = LN0->getAlignment();
  6317. if (Align <= OrigAlign) {
  6318. SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
  6319. LN0->getBasePtr(), LN0->getPointerInfo(),
  6320. LN0->isVolatile(), LN0->isNonTemporal(),
  6321. LN0->isInvariant(), OrigAlign,
  6322. LN0->getAAInfo());
  6323. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  6324. return Load;
  6325. }
  6326. }
  6327. // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
  6328. // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
  6329. // This often reduces constant pool loads.
  6330. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
  6331. (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
  6332. N0.getNode()->hasOneUse() && VT.isInteger() &&
  6333. !VT.isVector() && !N0.getValueType().isVector()) {
  6334. SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
  6335. N0.getOperand(0));
  6336. AddToWorklist(NewConv.getNode());
  6337. SDLoc DL(N);
  6338. APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
  6339. if (N0.getOpcode() == ISD::FNEG)
  6340. return DAG.getNode(ISD::XOR, DL, VT,
  6341. NewConv, DAG.getConstant(SignBit, DL, VT));
  6342. assert(N0.getOpcode() == ISD::FABS);
  6343. return DAG.getNode(ISD::AND, DL, VT,
  6344. NewConv, DAG.getConstant(~SignBit, DL, VT));
  6345. }
  6346. // fold (bitconvert (fcopysign cst, x)) ->
  6347. // (or (and (bitconvert x), sign), (and cst, (not sign)))
  6348. // Note that we don't handle (copysign x, cst) because this can always be
  6349. // folded to an fneg or fabs.
  6350. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
  6351. isa<ConstantFPSDNode>(N0.getOperand(0)) &&
  6352. VT.isInteger() && !VT.isVector()) {
  6353. unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
  6354. EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
  6355. if (isTypeLegal(IntXVT)) {
  6356. SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
  6357. IntXVT, N0.getOperand(1));
  6358. AddToWorklist(X.getNode());
  6359. // If X has a different width than the result/lhs, sext it or truncate it.
  6360. unsigned VTWidth = VT.getSizeInBits();
  6361. if (OrigXWidth < VTWidth) {
  6362. X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
  6363. AddToWorklist(X.getNode());
  6364. } else if (OrigXWidth > VTWidth) {
  6365. // To get the sign bit in the right place, we have to shift it right
  6366. // before truncating.
  6367. SDLoc DL(X);
  6368. X = DAG.getNode(ISD::SRL, DL,
  6369. X.getValueType(), X,
  6370. DAG.getConstant(OrigXWidth-VTWidth, DL,
  6371. X.getValueType()));
  6372. AddToWorklist(X.getNode());
  6373. X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
  6374. AddToWorklist(X.getNode());
  6375. }
  6376. APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
  6377. X = DAG.getNode(ISD::AND, SDLoc(X), VT,
  6378. X, DAG.getConstant(SignBit, SDLoc(X), VT));
  6379. AddToWorklist(X.getNode());
  6380. SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
  6381. VT, N0.getOperand(0));
  6382. Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
  6383. Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
  6384. AddToWorklist(Cst.getNode());
  6385. return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
  6386. }
  6387. }
  6388. // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
  6389. if (N0.getOpcode() == ISD::BUILD_PAIR) {
  6390. SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
  6391. if (CombineLD.getNode())
  6392. return CombineLD;
  6393. }
  6394. // Remove double bitcasts from shuffles - this is often a legacy of
  6395. // XformToShuffleWithZero being used to combine bitmaskings (of
  6396. // float vectors bitcast to integer vectors) into shuffles.
  6397. // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
  6398. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
  6399. N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
  6400. VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
  6401. !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
  6402. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
  6403. // If operands are a bitcast, peek through if it casts the original VT.
  6404. // If operands are a UNDEF or constant, just bitcast back to original VT.
  6405. auto PeekThroughBitcast = [&](SDValue Op) {
  6406. if (Op.getOpcode() == ISD::BITCAST &&
  6407. Op.getOperand(0)->getValueType(0) == VT)
  6408. return SDValue(Op.getOperand(0));
  6409. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
  6410. ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
  6411. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
  6412. return SDValue();
  6413. };
  6414. SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
  6415. SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
  6416. if (!(SV0 && SV1))
  6417. return SDValue();
  6418. int MaskScale =
  6419. VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
  6420. SmallVector<int, 8> NewMask;
  6421. for (int M : SVN->getMask())
  6422. for (int i = 0; i != MaskScale; ++i)
  6423. NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
  6424. bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
  6425. if (!LegalMask) {
  6426. std::swap(SV0, SV1);
  6427. ShuffleVectorSDNode::commuteMask(NewMask);
  6428. LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
  6429. }
  6430. if (LegalMask)
  6431. return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
  6432. }
  6433. return SDValue();
  6434. }
  6435. SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
  6436. EVT VT = N->getValueType(0);
  6437. return CombineConsecutiveLoads(N, VT);
  6438. }
  6439. /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
  6440. /// operands. DstEltVT indicates the destination element value type.
  6441. SDValue DAGCombiner::
  6442. ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
  6443. EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
  6444. // If this is already the right type, we're done.
  6445. if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
  6446. unsigned SrcBitSize = SrcEltVT.getSizeInBits();
  6447. unsigned DstBitSize = DstEltVT.getSizeInBits();
  6448. // If this is a conversion of N elements of one type to N elements of another
  6449. // type, convert each element. This handles FP<->INT cases.
  6450. if (SrcBitSize == DstBitSize) {
  6451. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  6452. BV->getValueType(0).getVectorNumElements());
  6453. // Due to the FP element handling below calling this routine recursively,
  6454. // we can end up with a scalar-to-vector node here.
  6455. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
  6456. return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
  6457. DAG.getNode(ISD::BITCAST, SDLoc(BV),
  6458. DstEltVT, BV->getOperand(0)));
  6459. SmallVector<SDValue, 8> Ops;
  6460. for (SDValue Op : BV->op_values()) {
  6461. // If the vector element type is not legal, the BUILD_VECTOR operands
  6462. // are promoted and implicitly truncated. Make that explicit here.
  6463. if (Op.getValueType() != SrcEltVT)
  6464. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
  6465. Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
  6466. DstEltVT, Op));
  6467. AddToWorklist(Ops.back().getNode());
  6468. }
  6469. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
  6470. }
  6471. // Otherwise, we're growing or shrinking the elements. To avoid having to
  6472. // handle annoying details of growing/shrinking FP values, we convert them to
  6473. // int first.
  6474. if (SrcEltVT.isFloatingPoint()) {
  6475. // Convert the input float vector to a int vector where the elements are the
  6476. // same sizes.
  6477. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
  6478. BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
  6479. SrcEltVT = IntVT;
  6480. }
  6481. // Now we know the input is an integer vector. If the output is a FP type,
  6482. // convert to integer first, then to FP of the right size.
  6483. if (DstEltVT.isFloatingPoint()) {
  6484. EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
  6485. SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
  6486. // Next, convert to FP elements of the same size.
  6487. return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
  6488. }
  6489. SDLoc DL(BV);
  6490. // Okay, we know the src/dst types are both integers of differing types.
  6491. // Handling growing first.
  6492. assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
  6493. if (SrcBitSize < DstBitSize) {
  6494. unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
  6495. SmallVector<SDValue, 8> Ops;
  6496. for (unsigned i = 0, e = BV->getNumOperands(); i != e;
  6497. i += NumInputsPerOutput) {
  6498. bool isLE = DAG.getDataLayout().isLittleEndian();
  6499. APInt NewBits = APInt(DstBitSize, 0);
  6500. bool EltIsUndef = true;
  6501. for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
  6502. // Shift the previously computed bits over.
  6503. NewBits <<= SrcBitSize;
  6504. SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
  6505. if (Op.getOpcode() == ISD::UNDEF) continue;
  6506. EltIsUndef = false;
  6507. NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
  6508. zextOrTrunc(SrcBitSize).zext(DstBitSize);
  6509. }
  6510. if (EltIsUndef)
  6511. Ops.push_back(DAG.getUNDEF(DstEltVT));
  6512. else
  6513. Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
  6514. }
  6515. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
  6516. return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
  6517. }
  6518. // Finally, this must be the case where we are shrinking elements: each input
  6519. // turns into multiple outputs.
  6520. unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
  6521. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  6522. NumOutputsPerInput*BV->getNumOperands());
  6523. SmallVector<SDValue, 8> Ops;
  6524. for (const SDValue &Op : BV->op_values()) {
  6525. if (Op.getOpcode() == ISD::UNDEF) {
  6526. Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
  6527. continue;
  6528. }
  6529. APInt OpVal = cast<ConstantSDNode>(Op)->
  6530. getAPIntValue().zextOrTrunc(SrcBitSize);
  6531. for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
  6532. APInt ThisVal = OpVal.trunc(DstBitSize);
  6533. Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
  6534. OpVal = OpVal.lshr(DstBitSize);
  6535. }
  6536. // For big endian targets, swap the order of the pieces of each element.
  6537. if (DAG.getDataLayout().isBigEndian())
  6538. std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
  6539. }
  6540. return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
  6541. }
  6542. /// Try to perform FMA combining on a given FADD node.
  6543. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
  6544. SDValue N0 = N->getOperand(0);
  6545. SDValue N1 = N->getOperand(1);
  6546. EVT VT = N->getValueType(0);
  6547. SDLoc SL(N);
  6548. const TargetOptions &Options = DAG.getTarget().Options;
  6549. bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
  6550. Options.UnsafeFPMath);
  6551. // Floating-point multiply-add with intermediate rounding.
  6552. bool HasFMAD = (LegalOperations &&
  6553. TLI.isOperationLegal(ISD::FMAD, VT));
  6554. // Floating-point multiply-add without intermediate rounding.
  6555. bool HasFMA = ((!LegalOperations ||
  6556. TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
  6557. TLI.isFMAFasterThanFMulAndFAdd(VT) &&
  6558. UnsafeFPMath);
  6559. // No valid opcode, do not combine.
  6560. if (!HasFMAD && !HasFMA)
  6561. return SDValue();
  6562. // Always prefer FMAD to FMA for precision.
  6563. unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
  6564. bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
  6565. bool LookThroughFPExt = TLI.isFPExtFree(VT);
  6566. // fold (fadd (fmul x, y), z) -> (fma x, y, z)
  6567. if (N0.getOpcode() == ISD::FMUL &&
  6568. (Aggressive || N0->hasOneUse())) {
  6569. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6570. N0.getOperand(0), N0.getOperand(1), N1);
  6571. }
  6572. // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
  6573. // Note: Commutes FADD operands.
  6574. if (N1.getOpcode() == ISD::FMUL &&
  6575. (Aggressive || N1->hasOneUse())) {
  6576. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6577. N1.getOperand(0), N1.getOperand(1), N0);
  6578. }
  6579. // Look through FP_EXTEND nodes to do more combining.
  6580. if (UnsafeFPMath && LookThroughFPExt) {
  6581. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
  6582. if (N0.getOpcode() == ISD::FP_EXTEND) {
  6583. SDValue N00 = N0.getOperand(0);
  6584. if (N00.getOpcode() == ISD::FMUL)
  6585. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6586. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6587. N00.getOperand(0)),
  6588. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6589. N00.getOperand(1)), N1);
  6590. }
  6591. // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
  6592. // Note: Commutes FADD operands.
  6593. if (N1.getOpcode() == ISD::FP_EXTEND) {
  6594. SDValue N10 = N1.getOperand(0);
  6595. if (N10.getOpcode() == ISD::FMUL)
  6596. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6597. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6598. N10.getOperand(0)),
  6599. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6600. N10.getOperand(1)), N0);
  6601. }
  6602. }
  6603. // More folding opportunities when target permits.
  6604. if ((UnsafeFPMath || HasFMAD) && Aggressive) {
  6605. // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
  6606. if (N0.getOpcode() == PreferredFusedOpcode &&
  6607. N0.getOperand(2).getOpcode() == ISD::FMUL) {
  6608. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6609. N0.getOperand(0), N0.getOperand(1),
  6610. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6611. N0.getOperand(2).getOperand(0),
  6612. N0.getOperand(2).getOperand(1),
  6613. N1));
  6614. }
  6615. // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
  6616. if (N1->getOpcode() == PreferredFusedOpcode &&
  6617. N1.getOperand(2).getOpcode() == ISD::FMUL) {
  6618. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6619. N1.getOperand(0), N1.getOperand(1),
  6620. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6621. N1.getOperand(2).getOperand(0),
  6622. N1.getOperand(2).getOperand(1),
  6623. N0));
  6624. }
  6625. if (UnsafeFPMath && LookThroughFPExt) {
  6626. // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
  6627. // -> (fma x, y, (fma (fpext u), (fpext v), z))
  6628. auto FoldFAddFMAFPExtFMul = [&] (
  6629. SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
  6630. return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
  6631. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6632. DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
  6633. DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
  6634. Z));
  6635. };
  6636. if (N0.getOpcode() == PreferredFusedOpcode) {
  6637. SDValue N02 = N0.getOperand(2);
  6638. if (N02.getOpcode() == ISD::FP_EXTEND) {
  6639. SDValue N020 = N02.getOperand(0);
  6640. if (N020.getOpcode() == ISD::FMUL)
  6641. return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
  6642. N020.getOperand(0), N020.getOperand(1),
  6643. N1);
  6644. }
  6645. }
  6646. // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
  6647. // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
  6648. // FIXME: This turns two single-precision and one double-precision
  6649. // operation into two double-precision operations, which might not be
  6650. // interesting for all targets, especially GPUs.
  6651. auto FoldFAddFPExtFMAFMul = [&] (
  6652. SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
  6653. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6654. DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
  6655. DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
  6656. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6657. DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
  6658. DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
  6659. Z));
  6660. };
  6661. if (N0.getOpcode() == ISD::FP_EXTEND) {
  6662. SDValue N00 = N0.getOperand(0);
  6663. if (N00.getOpcode() == PreferredFusedOpcode) {
  6664. SDValue N002 = N00.getOperand(2);
  6665. if (N002.getOpcode() == ISD::FMUL)
  6666. return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
  6667. N002.getOperand(0), N002.getOperand(1),
  6668. N1);
  6669. }
  6670. }
  6671. // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
  6672. // -> (fma y, z, (fma (fpext u), (fpext v), x))
  6673. if (N1.getOpcode() == PreferredFusedOpcode) {
  6674. SDValue N12 = N1.getOperand(2);
  6675. if (N12.getOpcode() == ISD::FP_EXTEND) {
  6676. SDValue N120 = N12.getOperand(0);
  6677. if (N120.getOpcode() == ISD::FMUL)
  6678. return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
  6679. N120.getOperand(0), N120.getOperand(1),
  6680. N0);
  6681. }
  6682. }
  6683. // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
  6684. // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
  6685. // FIXME: This turns two single-precision and one double-precision
  6686. // operation into two double-precision operations, which might not be
  6687. // interesting for all targets, especially GPUs.
  6688. if (N1.getOpcode() == ISD::FP_EXTEND) {
  6689. SDValue N10 = N1.getOperand(0);
  6690. if (N10.getOpcode() == PreferredFusedOpcode) {
  6691. SDValue N102 = N10.getOperand(2);
  6692. if (N102.getOpcode() == ISD::FMUL)
  6693. return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
  6694. N102.getOperand(0), N102.getOperand(1),
  6695. N0);
  6696. }
  6697. }
  6698. }
  6699. }
  6700. return SDValue();
  6701. }
  6702. /// Try to perform FMA combining on a given FSUB node.
  6703. SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
  6704. SDValue N0 = N->getOperand(0);
  6705. SDValue N1 = N->getOperand(1);
  6706. EVT VT = N->getValueType(0);
  6707. SDLoc SL(N);
  6708. const TargetOptions &Options = DAG.getTarget().Options;
  6709. bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
  6710. Options.UnsafeFPMath);
  6711. // Floating-point multiply-add with intermediate rounding.
  6712. bool HasFMAD = (LegalOperations &&
  6713. TLI.isOperationLegal(ISD::FMAD, VT));
  6714. // Floating-point multiply-add without intermediate rounding.
  6715. bool HasFMA = ((!LegalOperations ||
  6716. TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
  6717. TLI.isFMAFasterThanFMulAndFAdd(VT) &&
  6718. UnsafeFPMath);
  6719. // No valid opcode, do not combine.
  6720. if (!HasFMAD && !HasFMA)
  6721. return SDValue();
  6722. // Always prefer FMAD to FMA for precision.
  6723. unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
  6724. bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
  6725. bool LookThroughFPExt = TLI.isFPExtFree(VT);
  6726. // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
  6727. if (N0.getOpcode() == ISD::FMUL &&
  6728. (Aggressive || N0->hasOneUse())) {
  6729. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6730. N0.getOperand(0), N0.getOperand(1),
  6731. DAG.getNode(ISD::FNEG, SL, VT, N1));
  6732. }
  6733. // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
  6734. // Note: Commutes FSUB operands.
  6735. if (N1.getOpcode() == ISD::FMUL &&
  6736. (Aggressive || N1->hasOneUse()))
  6737. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6738. DAG.getNode(ISD::FNEG, SL, VT,
  6739. N1.getOperand(0)),
  6740. N1.getOperand(1), N0);
  6741. // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
  6742. if (N0.getOpcode() == ISD::FNEG &&
  6743. N0.getOperand(0).getOpcode() == ISD::FMUL &&
  6744. (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
  6745. SDValue N00 = N0.getOperand(0).getOperand(0);
  6746. SDValue N01 = N0.getOperand(0).getOperand(1);
  6747. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6748. DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
  6749. DAG.getNode(ISD::FNEG, SL, VT, N1));
  6750. }
  6751. // Look through FP_EXTEND nodes to do more combining.
  6752. if (UnsafeFPMath && LookThroughFPExt) {
  6753. // fold (fsub (fpext (fmul x, y)), z)
  6754. // -> (fma (fpext x), (fpext y), (fneg z))
  6755. if (N0.getOpcode() == ISD::FP_EXTEND) {
  6756. SDValue N00 = N0.getOperand(0);
  6757. if (N00.getOpcode() == ISD::FMUL)
  6758. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6759. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6760. N00.getOperand(0)),
  6761. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6762. N00.getOperand(1)),
  6763. DAG.getNode(ISD::FNEG, SL, VT, N1));
  6764. }
  6765. // fold (fsub x, (fpext (fmul y, z)))
  6766. // -> (fma (fneg (fpext y)), (fpext z), x)
  6767. // Note: Commutes FSUB operands.
  6768. if (N1.getOpcode() == ISD::FP_EXTEND) {
  6769. SDValue N10 = N1.getOperand(0);
  6770. if (N10.getOpcode() == ISD::FMUL)
  6771. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6772. DAG.getNode(ISD::FNEG, SL, VT,
  6773. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6774. N10.getOperand(0))),
  6775. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6776. N10.getOperand(1)),
  6777. N0);
  6778. }
  6779. // fold (fsub (fpext (fneg (fmul, x, y))), z)
  6780. // -> (fneg (fma (fpext x), (fpext y), z))
  6781. // Note: This could be removed with appropriate canonicalization of the
  6782. // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
  6783. // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
  6784. // from implementing the canonicalization in visitFSUB.
  6785. if (N0.getOpcode() == ISD::FP_EXTEND) {
  6786. SDValue N00 = N0.getOperand(0);
  6787. if (N00.getOpcode() == ISD::FNEG) {
  6788. SDValue N000 = N00.getOperand(0);
  6789. if (N000.getOpcode() == ISD::FMUL) {
  6790. return DAG.getNode(ISD::FNEG, SL, VT,
  6791. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6792. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6793. N000.getOperand(0)),
  6794. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6795. N000.getOperand(1)),
  6796. N1));
  6797. }
  6798. }
  6799. }
  6800. // fold (fsub (fneg (fpext (fmul, x, y))), z)
  6801. // -> (fneg (fma (fpext x)), (fpext y), z)
  6802. // Note: This could be removed with appropriate canonicalization of the
  6803. // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
  6804. // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
  6805. // from implementing the canonicalization in visitFSUB.
  6806. if (N0.getOpcode() == ISD::FNEG) {
  6807. SDValue N00 = N0.getOperand(0);
  6808. if (N00.getOpcode() == ISD::FP_EXTEND) {
  6809. SDValue N000 = N00.getOperand(0);
  6810. if (N000.getOpcode() == ISD::FMUL) {
  6811. return DAG.getNode(ISD::FNEG, SL, VT,
  6812. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6813. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6814. N000.getOperand(0)),
  6815. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6816. N000.getOperand(1)),
  6817. N1));
  6818. }
  6819. }
  6820. }
  6821. }
  6822. // More folding opportunities when target permits.
  6823. if ((UnsafeFPMath || HasFMAD) && Aggressive) {
  6824. // fold (fsub (fma x, y, (fmul u, v)), z)
  6825. // -> (fma x, y (fma u, v, (fneg z)))
  6826. if (N0.getOpcode() == PreferredFusedOpcode &&
  6827. N0.getOperand(2).getOpcode() == ISD::FMUL) {
  6828. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6829. N0.getOperand(0), N0.getOperand(1),
  6830. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6831. N0.getOperand(2).getOperand(0),
  6832. N0.getOperand(2).getOperand(1),
  6833. DAG.getNode(ISD::FNEG, SL, VT,
  6834. N1)));
  6835. }
  6836. // fold (fsub x, (fma y, z, (fmul u, v)))
  6837. // -> (fma (fneg y), z, (fma (fneg u), v, x))
  6838. if (N1.getOpcode() == PreferredFusedOpcode &&
  6839. N1.getOperand(2).getOpcode() == ISD::FMUL) {
  6840. SDValue N20 = N1.getOperand(2).getOperand(0);
  6841. SDValue N21 = N1.getOperand(2).getOperand(1);
  6842. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6843. DAG.getNode(ISD::FNEG, SL, VT,
  6844. N1.getOperand(0)),
  6845. N1.getOperand(1),
  6846. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6847. DAG.getNode(ISD::FNEG, SL, VT, N20),
  6848. N21, N0));
  6849. }
  6850. if (UnsafeFPMath && LookThroughFPExt) {
  6851. // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
  6852. // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
  6853. if (N0.getOpcode() == PreferredFusedOpcode) {
  6854. SDValue N02 = N0.getOperand(2);
  6855. if (N02.getOpcode() == ISD::FP_EXTEND) {
  6856. SDValue N020 = N02.getOperand(0);
  6857. if (N020.getOpcode() == ISD::FMUL)
  6858. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6859. N0.getOperand(0), N0.getOperand(1),
  6860. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6861. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6862. N020.getOperand(0)),
  6863. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6864. N020.getOperand(1)),
  6865. DAG.getNode(ISD::FNEG, SL, VT,
  6866. N1)));
  6867. }
  6868. }
  6869. // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
  6870. // -> (fma (fpext x), (fpext y),
  6871. // (fma (fpext u), (fpext v), (fneg z)))
  6872. // FIXME: This turns two single-precision and one double-precision
  6873. // operation into two double-precision operations, which might not be
  6874. // interesting for all targets, especially GPUs.
  6875. if (N0.getOpcode() == ISD::FP_EXTEND) {
  6876. SDValue N00 = N0.getOperand(0);
  6877. if (N00.getOpcode() == PreferredFusedOpcode) {
  6878. SDValue N002 = N00.getOperand(2);
  6879. if (N002.getOpcode() == ISD::FMUL)
  6880. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6881. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6882. N00.getOperand(0)),
  6883. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6884. N00.getOperand(1)),
  6885. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6886. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6887. N002.getOperand(0)),
  6888. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6889. N002.getOperand(1)),
  6890. DAG.getNode(ISD::FNEG, SL, VT,
  6891. N1)));
  6892. }
  6893. }
  6894. // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
  6895. // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
  6896. if (N1.getOpcode() == PreferredFusedOpcode &&
  6897. N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
  6898. SDValue N120 = N1.getOperand(2).getOperand(0);
  6899. if (N120.getOpcode() == ISD::FMUL) {
  6900. SDValue N1200 = N120.getOperand(0);
  6901. SDValue N1201 = N120.getOperand(1);
  6902. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6903. DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
  6904. N1.getOperand(1),
  6905. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6906. DAG.getNode(ISD::FNEG, SL, VT,
  6907. DAG.getNode(ISD::FP_EXTEND, SL,
  6908. VT, N1200)),
  6909. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6910. N1201),
  6911. N0));
  6912. }
  6913. }
  6914. // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
  6915. // -> (fma (fneg (fpext y)), (fpext z),
  6916. // (fma (fneg (fpext u)), (fpext v), x))
  6917. // FIXME: This turns two single-precision and one double-precision
  6918. // operation into two double-precision operations, which might not be
  6919. // interesting for all targets, especially GPUs.
  6920. if (N1.getOpcode() == ISD::FP_EXTEND &&
  6921. N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
  6922. SDValue N100 = N1.getOperand(0).getOperand(0);
  6923. SDValue N101 = N1.getOperand(0).getOperand(1);
  6924. SDValue N102 = N1.getOperand(0).getOperand(2);
  6925. if (N102.getOpcode() == ISD::FMUL) {
  6926. SDValue N1020 = N102.getOperand(0);
  6927. SDValue N1021 = N102.getOperand(1);
  6928. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  6929. DAG.getNode(ISD::FNEG, SL, VT,
  6930. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6931. N100)),
  6932. DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
  6933. DAG.getNode(PreferredFusedOpcode, SL, VT,
  6934. DAG.getNode(ISD::FNEG, SL, VT,
  6935. DAG.getNode(ISD::FP_EXTEND, SL,
  6936. VT, N1020)),
  6937. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  6938. N1021),
  6939. N0));
  6940. }
  6941. }
  6942. }
  6943. }
  6944. return SDValue();
  6945. }
  6946. SDValue DAGCombiner::visitFADD(SDNode *N) {
  6947. SDValue N0 = N->getOperand(0);
  6948. SDValue N1 = N->getOperand(1);
  6949. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  6950. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  6951. EVT VT = N->getValueType(0);
  6952. SDLoc DL(N);
  6953. const TargetOptions &Options = DAG.getTarget().Options;
  6954. // fold vector ops
  6955. if (VT.isVector())
  6956. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  6957. return FoldedVOp;
  6958. // fold (fadd c1, c2) -> c1 + c2
  6959. if (N0CFP && N1CFP)
  6960. return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
  6961. // canonicalize constant to RHS
  6962. if (N0CFP && !N1CFP)
  6963. return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
  6964. // fold (fadd A, (fneg B)) -> (fsub A, B)
  6965. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  6966. isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
  6967. return DAG.getNode(ISD::FSUB, DL, VT, N0,
  6968. GetNegatedExpression(N1, DAG, LegalOperations));
  6969. // fold (fadd (fneg A), B) -> (fsub B, A)
  6970. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  6971. isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
  6972. return DAG.getNode(ISD::FSUB, DL, VT, N1,
  6973. GetNegatedExpression(N0, DAG, LegalOperations));
  6974. // If 'unsafe math' is enabled, fold lots of things.
  6975. if (Options.UnsafeFPMath) {
  6976. // No FP constant should be created after legalization as Instruction
  6977. // Selection pass has a hard time dealing with FP constants.
  6978. bool AllowNewConst = (Level < AfterLegalizeDAG);
  6979. // fold (fadd A, 0) -> A
  6980. if (N1CFP && N1CFP->isZero())
  6981. return N0;
  6982. // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
  6983. if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
  6984. isa<ConstantFPSDNode>(N0.getOperand(1)))
  6985. return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
  6986. DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
  6987. // If allowed, fold (fadd (fneg x), x) -> 0.0
  6988. if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
  6989. return DAG.getConstantFP(0.0, DL, VT);
  6990. // If allowed, fold (fadd x, (fneg x)) -> 0.0
  6991. if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
  6992. return DAG.getConstantFP(0.0, DL, VT);
  6993. // We can fold chains of FADD's of the same value into multiplications.
  6994. // This transform is not safe in general because we are reducing the number
  6995. // of rounding steps.
  6996. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
  6997. if (N0.getOpcode() == ISD::FMUL) {
  6998. ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
  6999. ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
  7000. // (fadd (fmul x, c), x) -> (fmul x, c+1)
  7001. if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
  7002. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
  7003. DAG.getConstantFP(1.0, DL, VT));
  7004. return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
  7005. }
  7006. // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
  7007. if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
  7008. N1.getOperand(0) == N1.getOperand(1) &&
  7009. N0.getOperand(0) == N1.getOperand(0)) {
  7010. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
  7011. DAG.getConstantFP(2.0, DL, VT));
  7012. return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
  7013. }
  7014. }
  7015. if (N1.getOpcode() == ISD::FMUL) {
  7016. ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
  7017. ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
  7018. // (fadd x, (fmul x, c)) -> (fmul x, c+1)
  7019. if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
  7020. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
  7021. DAG.getConstantFP(1.0, DL, VT));
  7022. return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
  7023. }
  7024. // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
  7025. if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
  7026. N0.getOperand(0) == N0.getOperand(1) &&
  7027. N1.getOperand(0) == N0.getOperand(0)) {
  7028. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
  7029. DAG.getConstantFP(2.0, DL, VT));
  7030. return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
  7031. }
  7032. }
  7033. if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
  7034. ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
  7035. // (fadd (fadd x, x), x) -> (fmul x, 3.0)
  7036. if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
  7037. (N0.getOperand(0) == N1)) {
  7038. return DAG.getNode(ISD::FMUL, DL, VT,
  7039. N1, DAG.getConstantFP(3.0, DL, VT));
  7040. }
  7041. }
  7042. if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
  7043. ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
  7044. // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
  7045. if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
  7046. N1.getOperand(0) == N0) {
  7047. return DAG.getNode(ISD::FMUL, DL, VT,
  7048. N0, DAG.getConstantFP(3.0, DL, VT));
  7049. }
  7050. }
  7051. // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
  7052. if (AllowNewConst &&
  7053. N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
  7054. N0.getOperand(0) == N0.getOperand(1) &&
  7055. N1.getOperand(0) == N1.getOperand(1) &&
  7056. N0.getOperand(0) == N1.getOperand(0)) {
  7057. return DAG.getNode(ISD::FMUL, DL, VT,
  7058. N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
  7059. }
  7060. }
  7061. } // enable-unsafe-fp-math
  7062. // FADD -> FMA combines:
  7063. SDValue Fused = visitFADDForFMACombine(N);
  7064. if (Fused) {
  7065. AddToWorklist(Fused.getNode());
  7066. return Fused;
  7067. }
  7068. return SDValue();
  7069. }
  7070. SDValue DAGCombiner::visitFSUB(SDNode *N) {
  7071. SDValue N0 = N->getOperand(0);
  7072. SDValue N1 = N->getOperand(1);
  7073. ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
  7074. ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
  7075. EVT VT = N->getValueType(0);
  7076. SDLoc dl(N);
  7077. const TargetOptions &Options = DAG.getTarget().Options;
  7078. // fold vector ops
  7079. if (VT.isVector())
  7080. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  7081. return FoldedVOp;
  7082. // fold (fsub c1, c2) -> c1-c2
  7083. if (N0CFP && N1CFP)
  7084. return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
  7085. // fold (fsub A, (fneg B)) -> (fadd A, B)
  7086. if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
  7087. return DAG.getNode(ISD::FADD, dl, VT, N0,
  7088. GetNegatedExpression(N1, DAG, LegalOperations));
  7089. // If 'unsafe math' is enabled, fold lots of things.
  7090. if (Options.UnsafeFPMath) {
  7091. // (fsub A, 0) -> A
  7092. if (N1CFP && N1CFP->isZero())
  7093. return N0;
  7094. // (fsub 0, B) -> -B
  7095. if (N0CFP && N0CFP->isZero()) {
  7096. if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
  7097. return GetNegatedExpression(N1, DAG, LegalOperations);
  7098. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  7099. return DAG.getNode(ISD::FNEG, dl, VT, N1);
  7100. }
  7101. // (fsub x, x) -> 0.0
  7102. if (N0 == N1)
  7103. return DAG.getConstantFP(0.0f, dl, VT);
  7104. // (fsub x, (fadd x, y)) -> (fneg y)
  7105. // (fsub x, (fadd y, x)) -> (fneg y)
  7106. if (N1.getOpcode() == ISD::FADD) {
  7107. SDValue N10 = N1->getOperand(0);
  7108. SDValue N11 = N1->getOperand(1);
  7109. if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
  7110. return GetNegatedExpression(N11, DAG, LegalOperations);
  7111. if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
  7112. return GetNegatedExpression(N10, DAG, LegalOperations);
  7113. }
  7114. }
  7115. // FSUB -> FMA combines:
  7116. SDValue Fused = visitFSUBForFMACombine(N);
  7117. if (Fused) {
  7118. AddToWorklist(Fused.getNode());
  7119. return Fused;
  7120. }
  7121. return SDValue();
  7122. }
  7123. SDValue DAGCombiner::visitFMUL(SDNode *N) {
  7124. SDValue N0 = N->getOperand(0);
  7125. SDValue N1 = N->getOperand(1);
  7126. ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
  7127. ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
  7128. EVT VT = N->getValueType(0);
  7129. SDLoc DL(N);
  7130. const TargetOptions &Options = DAG.getTarget().Options;
  7131. // fold vector ops
  7132. if (VT.isVector()) {
  7133. // This just handles C1 * C2 for vectors. Other vector folds are below.
  7134. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  7135. return FoldedVOp;
  7136. }
  7137. // fold (fmul c1, c2) -> c1*c2
  7138. if (N0CFP && N1CFP)
  7139. return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
  7140. // canonicalize constant to RHS
  7141. if (isConstantFPBuildVectorOrConstantFP(N0) &&
  7142. !isConstantFPBuildVectorOrConstantFP(N1))
  7143. return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
  7144. // fold (fmul A, 1.0) -> A
  7145. if (N1CFP && N1CFP->isExactlyValue(1.0))
  7146. return N0;
  7147. if (Options.UnsafeFPMath) {
  7148. // fold (fmul A, 0) -> 0
  7149. if (N1CFP && N1CFP->isZero())
  7150. return N1;
  7151. // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
  7152. if (N0.getOpcode() == ISD::FMUL) {
  7153. // Fold scalars or any vector constants (not just splats).
  7154. // This fold is done in general by InstCombine, but extra fmul insts
  7155. // may have been generated during lowering.
  7156. SDValue N00 = N0.getOperand(0);
  7157. SDValue N01 = N0.getOperand(1);
  7158. auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
  7159. auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
  7160. auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
  7161. // Check 1: Make sure that the first operand of the inner multiply is NOT
  7162. // a constant. Otherwise, we may induce infinite looping.
  7163. if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
  7164. // Check 2: Make sure that the second operand of the inner multiply and
  7165. // the second operand of the outer multiply are constants.
  7166. if ((N1CFP && isConstOrConstSplatFP(N01)) ||
  7167. (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
  7168. SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
  7169. return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
  7170. }
  7171. }
  7172. }
  7173. // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
  7174. // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
  7175. // during an early run of DAGCombiner can prevent folding with fmuls
  7176. // inserted during lowering.
  7177. if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
  7178. const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
  7179. SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
  7180. return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
  7181. }
  7182. }
  7183. // fold (fmul X, 2.0) -> (fadd X, X)
  7184. if (N1CFP && N1CFP->isExactlyValue(+2.0))
  7185. return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
  7186. // fold (fmul X, -1.0) -> (fneg X)
  7187. if (N1CFP && N1CFP->isExactlyValue(-1.0))
  7188. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  7189. return DAG.getNode(ISD::FNEG, DL, VT, N0);
  7190. // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
  7191. if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
  7192. if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
  7193. // Both can be negated for free, check to see if at least one is cheaper
  7194. // negated.
  7195. if (LHSNeg == 2 || RHSNeg == 2)
  7196. return DAG.getNode(ISD::FMUL, DL, VT,
  7197. GetNegatedExpression(N0, DAG, LegalOperations),
  7198. GetNegatedExpression(N1, DAG, LegalOperations));
  7199. }
  7200. }
  7201. return SDValue();
  7202. }
  7203. SDValue DAGCombiner::visitFMA(SDNode *N) {
  7204. SDValue N0 = N->getOperand(0);
  7205. SDValue N1 = N->getOperand(1);
  7206. SDValue N2 = N->getOperand(2);
  7207. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7208. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7209. EVT VT = N->getValueType(0);
  7210. SDLoc dl(N);
  7211. const TargetOptions &Options = DAG.getTarget().Options;
  7212. // Constant fold FMA.
  7213. if (isa<ConstantFPSDNode>(N0) &&
  7214. isa<ConstantFPSDNode>(N1) &&
  7215. isa<ConstantFPSDNode>(N2)) {
  7216. return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
  7217. }
  7218. if (Options.UnsafeFPMath) {
  7219. if (N0CFP && N0CFP->isZero())
  7220. return N2;
  7221. if (N1CFP && N1CFP->isZero())
  7222. return N2;
  7223. }
  7224. if (N0CFP && N0CFP->isExactlyValue(1.0))
  7225. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
  7226. if (N1CFP && N1CFP->isExactlyValue(1.0))
  7227. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
  7228. // Canonicalize (fma c, x, y) -> (fma x, c, y)
  7229. if (N0CFP && !N1CFP)
  7230. return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
  7231. // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
  7232. if (Options.UnsafeFPMath && N1CFP &&
  7233. N2.getOpcode() == ISD::FMUL &&
  7234. N0 == N2.getOperand(0) &&
  7235. N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
  7236. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  7237. DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
  7238. }
  7239. // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
  7240. if (Options.UnsafeFPMath &&
  7241. N0.getOpcode() == ISD::FMUL && N1CFP &&
  7242. N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
  7243. return DAG.getNode(ISD::FMA, dl, VT,
  7244. N0.getOperand(0),
  7245. DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
  7246. N2);
  7247. }
  7248. // (fma x, 1, y) -> (fadd x, y)
  7249. // (fma x, -1, y) -> (fadd (fneg x), y)
  7250. if (N1CFP) {
  7251. if (N1CFP->isExactlyValue(1.0))
  7252. return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
  7253. if (N1CFP->isExactlyValue(-1.0) &&
  7254. (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
  7255. SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
  7256. AddToWorklist(RHSNeg.getNode());
  7257. return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
  7258. }
  7259. }
  7260. // (fma x, c, x) -> (fmul x, (c+1))
  7261. if (Options.UnsafeFPMath && N1CFP && N0 == N2)
  7262. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  7263. DAG.getNode(ISD::FADD, dl, VT,
  7264. N1, DAG.getConstantFP(1.0, dl, VT)));
  7265. // (fma x, c, (fneg x)) -> (fmul x, (c-1))
  7266. if (Options.UnsafeFPMath && N1CFP &&
  7267. N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
  7268. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  7269. DAG.getNode(ISD::FADD, dl, VT,
  7270. N1, DAG.getConstantFP(-1.0, dl, VT)));
  7271. return SDValue();
  7272. }
  7273. SDValue DAGCombiner::visitFDIV(SDNode *N) {
  7274. SDValue N0 = N->getOperand(0);
  7275. SDValue N1 = N->getOperand(1);
  7276. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7277. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7278. EVT VT = N->getValueType(0);
  7279. SDLoc DL(N);
  7280. const TargetOptions &Options = DAG.getTarget().Options;
  7281. // fold vector ops
  7282. if (VT.isVector())
  7283. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  7284. return FoldedVOp;
  7285. // fold (fdiv c1, c2) -> c1/c2
  7286. if (N0CFP && N1CFP)
  7287. return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
  7288. if (Options.UnsafeFPMath) {
  7289. // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
  7290. if (N1CFP) {
  7291. // Compute the reciprocal 1.0 / c2.
  7292. APFloat N1APF = N1CFP->getValueAPF();
  7293. APFloat Recip(N1APF.getSemantics(), 1); // 1.0
  7294. APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
  7295. // Only do the transform if the reciprocal is a legal fp immediate that
  7296. // isn't too nasty (eg NaN, denormal, ...).
  7297. if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
  7298. (!LegalOperations ||
  7299. // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
  7300. // backend)... we should handle this gracefully after Legalize.
  7301. // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
  7302. TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
  7303. TLI.isFPImmLegal(Recip, VT)))
  7304. return DAG.getNode(ISD::FMUL, DL, VT, N0,
  7305. DAG.getConstantFP(Recip, DL, VT));
  7306. }
  7307. // If this FDIV is part of a reciprocal square root, it may be folded
  7308. // into a target-specific square root estimate instruction.
  7309. if (N1.getOpcode() == ISD::FSQRT) {
  7310. if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
  7311. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
  7312. }
  7313. } else if (N1.getOpcode() == ISD::FP_EXTEND &&
  7314. N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  7315. if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
  7316. RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
  7317. AddToWorklist(RV.getNode());
  7318. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
  7319. }
  7320. } else if (N1.getOpcode() == ISD::FP_ROUND &&
  7321. N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  7322. if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
  7323. RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
  7324. AddToWorklist(RV.getNode());
  7325. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
  7326. }
  7327. } else if (N1.getOpcode() == ISD::FMUL) {
  7328. // Look through an FMUL. Even though this won't remove the FDIV directly,
  7329. // it's still worthwhile to get rid of the FSQRT if possible.
  7330. SDValue SqrtOp;
  7331. SDValue OtherOp;
  7332. if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  7333. SqrtOp = N1.getOperand(0);
  7334. OtherOp = N1.getOperand(1);
  7335. } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
  7336. SqrtOp = N1.getOperand(1);
  7337. OtherOp = N1.getOperand(0);
  7338. }
  7339. if (SqrtOp.getNode()) {
  7340. // We found a FSQRT, so try to make this fold:
  7341. // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
  7342. if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
  7343. RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
  7344. AddToWorklist(RV.getNode());
  7345. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
  7346. }
  7347. }
  7348. }
  7349. // Fold into a reciprocal estimate and multiply instead of a real divide.
  7350. if (SDValue RV = BuildReciprocalEstimate(N1)) {
  7351. AddToWorklist(RV.getNode());
  7352. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
  7353. }
  7354. }
  7355. // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
  7356. if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
  7357. if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
  7358. // Both can be negated for free, check to see if at least one is cheaper
  7359. // negated.
  7360. if (LHSNeg == 2 || RHSNeg == 2)
  7361. return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
  7362. GetNegatedExpression(N0, DAG, LegalOperations),
  7363. GetNegatedExpression(N1, DAG, LegalOperations));
  7364. }
  7365. }
  7366. // Combine multiple FDIVs with the same divisor into multiple FMULs by the
  7367. // reciprocal.
  7368. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
  7369. // Notice that this is not always beneficial. One reason is different target
  7370. // may have different costs for FDIV and FMUL, so sometimes the cost of two
  7371. // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
  7372. // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
  7373. if (Options.UnsafeFPMath) {
  7374. // Skip if current node is a reciprocal.
  7375. if (N0CFP && N0CFP->isExactlyValue(1.0))
  7376. return SDValue();
  7377. // Find all FDIV users of the same divisor.
  7378. // Use a set because duplicates may be present in the user list.
  7379. SetVector<SDNode *> Users;
  7380. for (auto *U : N1->uses())
  7381. if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
  7382. Users.insert(U);
  7383. if (TLI.combineRepeatedFPDivisors(Users.size())) {
  7384. SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
  7385. // FIXME: This optimization requires some level of fast-math, so the
  7386. // created reciprocal node should at least have the 'allowReciprocal'
  7387. // fast-math-flag set.
  7388. SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
  7389. // Dividend / Divisor -> Dividend * Reciprocal
  7390. for (auto *U : Users) {
  7391. SDValue Dividend = U->getOperand(0);
  7392. if (Dividend != FPOne) {
  7393. SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
  7394. Reciprocal);
  7395. CombineTo(U, NewNode);
  7396. } else if (U != Reciprocal.getNode()) {
  7397. // In the absence of fast-math-flags, this user node is always the
  7398. // same node as Reciprocal, but with FMF they may be different nodes.
  7399. CombineTo(U, Reciprocal);
  7400. }
  7401. }
  7402. return SDValue(N, 0); // N was replaced.
  7403. }
  7404. }
  7405. return SDValue();
  7406. }
  7407. SDValue DAGCombiner::visitFREM(SDNode *N) {
  7408. SDValue N0 = N->getOperand(0);
  7409. SDValue N1 = N->getOperand(1);
  7410. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7411. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7412. EVT VT = N->getValueType(0);
  7413. // fold (frem c1, c2) -> fmod(c1,c2)
  7414. if (N0CFP && N1CFP)
  7415. return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
  7416. return SDValue();
  7417. }
  7418. SDValue DAGCombiner::visitFSQRT(SDNode *N) {
  7419. if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
  7420. return SDValue();
  7421. // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
  7422. SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
  7423. if (!RV)
  7424. return SDValue();
  7425. EVT VT = RV.getValueType();
  7426. SDLoc DL(N);
  7427. RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
  7428. AddToWorklist(RV.getNode());
  7429. // Unfortunately, RV is now NaN if the input was exactly 0.
  7430. // Select out this case and force the answer to 0.
  7431. SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
  7432. EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7433. SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
  7434. AddToWorklist(ZeroCmp.getNode());
  7435. AddToWorklist(RV.getNode());
  7436. return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
  7437. ZeroCmp, Zero, RV);
  7438. }
  7439. SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
  7440. SDValue N0 = N->getOperand(0);
  7441. SDValue N1 = N->getOperand(1);
  7442. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7443. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7444. EVT VT = N->getValueType(0);
  7445. if (N0CFP && N1CFP) // Constant fold
  7446. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
  7447. if (N1CFP) {
  7448. const APFloat& V = N1CFP->getValueAPF();
  7449. // copysign(x, c1) -> fabs(x) iff ispos(c1)
  7450. // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
  7451. if (!V.isNegative()) {
  7452. if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
  7453. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  7454. } else {
  7455. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  7456. return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
  7457. DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
  7458. }
  7459. }
  7460. // copysign(fabs(x), y) -> copysign(x, y)
  7461. // copysign(fneg(x), y) -> copysign(x, y)
  7462. // copysign(copysign(x,z), y) -> copysign(x, y)
  7463. if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
  7464. N0.getOpcode() == ISD::FCOPYSIGN)
  7465. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  7466. N0.getOperand(0), N1);
  7467. // copysign(x, abs(y)) -> abs(x)
  7468. if (N1.getOpcode() == ISD::FABS)
  7469. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  7470. // copysign(x, copysign(y,z)) -> copysign(x, z)
  7471. if (N1.getOpcode() == ISD::FCOPYSIGN)
  7472. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  7473. N0, N1.getOperand(1));
  7474. // copysign(x, fp_extend(y)) -> copysign(x, y)
  7475. // copysign(x, fp_round(y)) -> copysign(x, y)
  7476. if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
  7477. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  7478. N0, N1.getOperand(0));
  7479. return SDValue();
  7480. }
  7481. SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
  7482. SDValue N0 = N->getOperand(0);
  7483. EVT VT = N->getValueType(0);
  7484. EVT OpVT = N0.getValueType();
  7485. // fold (sint_to_fp c1) -> c1fp
  7486. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  7487. // ...but only if the target supports immediate floating-point values
  7488. (!LegalOperations ||
  7489. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
  7490. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  7491. // If the input is a legal type, and SINT_TO_FP is not legal on this target,
  7492. // but UINT_TO_FP is legal on this target, try to convert.
  7493. if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
  7494. TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
  7495. // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
  7496. if (DAG.SignBitIsZero(N0))
  7497. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  7498. }
  7499. // The next optimizations are desirable only if SELECT_CC can be lowered.
  7500. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
  7501. // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  7502. if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
  7503. !VT.isVector() &&
  7504. (!LegalOperations ||
  7505. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  7506. SDLoc DL(N);
  7507. SDValue Ops[] =
  7508. { N0.getOperand(0), N0.getOperand(1),
  7509. DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  7510. N0.getOperand(2) };
  7511. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  7512. }
  7513. // fold (sint_to_fp (zext (setcc x, y, cc))) ->
  7514. // (select_cc x, y, 1.0, 0.0,, cc)
  7515. if (N0.getOpcode() == ISD::ZERO_EXTEND &&
  7516. N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
  7517. (!LegalOperations ||
  7518. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  7519. SDLoc DL(N);
  7520. SDValue Ops[] =
  7521. { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
  7522. DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  7523. N0.getOperand(0).getOperand(2) };
  7524. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  7525. }
  7526. }
  7527. return SDValue();
  7528. }
  7529. SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
  7530. SDValue N0 = N->getOperand(0);
  7531. EVT VT = N->getValueType(0);
  7532. EVT OpVT = N0.getValueType();
  7533. // fold (uint_to_fp c1) -> c1fp
  7534. if (isConstantIntBuildVectorOrConstantInt(N0) &&
  7535. // ...but only if the target supports immediate floating-point values
  7536. (!LegalOperations ||
  7537. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
  7538. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  7539. // If the input is a legal type, and UINT_TO_FP is not legal on this target,
  7540. // but SINT_TO_FP is legal on this target, try to convert.
  7541. if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
  7542. TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
  7543. // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
  7544. if (DAG.SignBitIsZero(N0))
  7545. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  7546. }
  7547. // The next optimizations are desirable only if SELECT_CC can be lowered.
  7548. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
  7549. // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  7550. if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
  7551. (!LegalOperations ||
  7552. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  7553. SDLoc DL(N);
  7554. SDValue Ops[] =
  7555. { N0.getOperand(0), N0.getOperand(1),
  7556. DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  7557. N0.getOperand(2) };
  7558. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  7559. }
  7560. }
  7561. return SDValue();
  7562. }
  7563. // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
  7564. static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
  7565. SDValue N0 = N->getOperand(0);
  7566. EVT VT = N->getValueType(0);
  7567. if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
  7568. return SDValue();
  7569. SDValue Src = N0.getOperand(0);
  7570. EVT SrcVT = Src.getValueType();
  7571. bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
  7572. bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
  7573. // We can safely assume the conversion won't overflow the output range,
  7574. // because (for example) (uint8_t)18293.f is undefined behavior.
  7575. // Since we can assume the conversion won't overflow, our decision as to
  7576. // whether the input will fit in the float should depend on the minimum
  7577. // of the input range and output range.
  7578. // This means this is also safe for a signed input and unsigned output, since
  7579. // a negative input would lead to undefined behavior.
  7580. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
  7581. unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
  7582. unsigned ActualSize = std::min(InputSize, OutputSize);
  7583. const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
  7584. // We can only fold away the float conversion if the input range can be
  7585. // represented exactly in the float range.
  7586. if (APFloat::semanticsPrecision(sem) >= ActualSize) {
  7587. if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
  7588. unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
  7589. : ISD::ZERO_EXTEND;
  7590. return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
  7591. }
  7592. if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
  7593. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
  7594. if (SrcVT == VT)
  7595. return Src;
  7596. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
  7597. }
  7598. return SDValue();
  7599. }
  7600. SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
  7601. SDValue N0 = N->getOperand(0);
  7602. EVT VT = N->getValueType(0);
  7603. // fold (fp_to_sint c1fp) -> c1
  7604. if (isConstantFPBuildVectorOrConstantFP(N0))
  7605. return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
  7606. return FoldIntToFPToInt(N, DAG);
  7607. }
  7608. SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
  7609. SDValue N0 = N->getOperand(0);
  7610. EVT VT = N->getValueType(0);
  7611. // fold (fp_to_uint c1fp) -> c1
  7612. if (isConstantFPBuildVectorOrConstantFP(N0))
  7613. return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
  7614. return FoldIntToFPToInt(N, DAG);
  7615. }
  7616. SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
  7617. SDValue N0 = N->getOperand(0);
  7618. SDValue N1 = N->getOperand(1);
  7619. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7620. EVT VT = N->getValueType(0);
  7621. // fold (fp_round c1fp) -> c1fp
  7622. if (N0CFP)
  7623. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
  7624. // fold (fp_round (fp_extend x)) -> x
  7625. if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
  7626. return N0.getOperand(0);
  7627. // fold (fp_round (fp_round x)) -> (fp_round x)
  7628. if (N0.getOpcode() == ISD::FP_ROUND) {
  7629. const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
  7630. const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
  7631. // If the first fp_round isn't a value preserving truncation, it might
  7632. // introduce a tie in the second fp_round, that wouldn't occur in the
  7633. // single-step fp_round we want to fold to.
  7634. // In other words, double rounding isn't the same as rounding.
  7635. // Also, this is a value preserving truncation iff both fp_round's are.
  7636. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
  7637. SDLoc DL(N);
  7638. return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
  7639. DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
  7640. }
  7641. }
  7642. // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
  7643. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
  7644. SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
  7645. N0.getOperand(0), N1);
  7646. AddToWorklist(Tmp.getNode());
  7647. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  7648. Tmp, N0.getOperand(1));
  7649. }
  7650. return SDValue();
  7651. }
  7652. SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
  7653. SDValue N0 = N->getOperand(0);
  7654. EVT VT = N->getValueType(0);
  7655. EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
  7656. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7657. // fold (fp_round_inreg c1fp) -> c1fp
  7658. if (N0CFP && isTypeLegal(EVT)) {
  7659. SDLoc DL(N);
  7660. SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
  7661. return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
  7662. }
  7663. return SDValue();
  7664. }
  7665. SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  7666. SDValue N0 = N->getOperand(0);
  7667. EVT VT = N->getValueType(0);
  7668. // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
  7669. if (N->hasOneUse() &&
  7670. N->use_begin()->getOpcode() == ISD::FP_ROUND)
  7671. return SDValue();
  7672. // fold (fp_extend c1fp) -> c1fp
  7673. if (isConstantFPBuildVectorOrConstantFP(N0))
  7674. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
  7675. // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
  7676. if (N0.getOpcode() == ISD::FP16_TO_FP &&
  7677. TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
  7678. return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
  7679. // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
  7680. // value of X.
  7681. if (N0.getOpcode() == ISD::FP_ROUND
  7682. && N0.getNode()->getConstantOperandVal(1) == 1) {
  7683. SDValue In = N0.getOperand(0);
  7684. if (In.getValueType() == VT) return In;
  7685. if (VT.bitsLT(In.getValueType()))
  7686. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
  7687. In, N0.getOperand(1));
  7688. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
  7689. }
  7690. // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
  7691. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  7692. TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
  7693. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  7694. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  7695. LN0->getChain(),
  7696. LN0->getBasePtr(), N0.getValueType(),
  7697. LN0->getMemOperand());
  7698. CombineTo(N, ExtLoad);
  7699. CombineTo(N0.getNode(),
  7700. DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
  7701. N0.getValueType(), ExtLoad,
  7702. DAG.getIntPtrConstant(1, SDLoc(N0))),
  7703. ExtLoad.getValue(1));
  7704. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  7705. }
  7706. return SDValue();
  7707. }
  7708. SDValue DAGCombiner::visitFCEIL(SDNode *N) {
  7709. SDValue N0 = N->getOperand(0);
  7710. EVT VT = N->getValueType(0);
  7711. // fold (fceil c1) -> fceil(c1)
  7712. if (isConstantFPBuildVectorOrConstantFP(N0))
  7713. return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
  7714. return SDValue();
  7715. }
  7716. SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
  7717. SDValue N0 = N->getOperand(0);
  7718. EVT VT = N->getValueType(0);
  7719. // fold (ftrunc c1) -> ftrunc(c1)
  7720. if (isConstantFPBuildVectorOrConstantFP(N0))
  7721. return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
  7722. return SDValue();
  7723. }
  7724. SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
  7725. SDValue N0 = N->getOperand(0);
  7726. EVT VT = N->getValueType(0);
  7727. // fold (ffloor c1) -> ffloor(c1)
  7728. if (isConstantFPBuildVectorOrConstantFP(N0))
  7729. return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
  7730. return SDValue();
  7731. }
  7732. // FIXME: FNEG and FABS have a lot in common; refactor.
  7733. SDValue DAGCombiner::visitFNEG(SDNode *N) {
  7734. SDValue N0 = N->getOperand(0);
  7735. EVT VT = N->getValueType(0);
  7736. // Constant fold FNEG.
  7737. if (isConstantFPBuildVectorOrConstantFP(N0))
  7738. return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
  7739. if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
  7740. &DAG.getTarget().Options))
  7741. return GetNegatedExpression(N0, DAG, LegalOperations);
  7742. // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
  7743. // constant pool values.
  7744. if (!TLI.isFNegFree(VT) &&
  7745. N0.getOpcode() == ISD::BITCAST &&
  7746. N0.getNode()->hasOneUse()) {
  7747. SDValue Int = N0.getOperand(0);
  7748. EVT IntVT = Int.getValueType();
  7749. if (IntVT.isInteger() && !IntVT.isVector()) {
  7750. APInt SignMask;
  7751. if (N0.getValueType().isVector()) {
  7752. // For a vector, get a mask such as 0x80... per scalar element
  7753. // and splat it.
  7754. SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
  7755. SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
  7756. } else {
  7757. // For a scalar, just generate 0x80...
  7758. SignMask = APInt::getSignBit(IntVT.getSizeInBits());
  7759. }
  7760. SDLoc DL0(N0);
  7761. Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
  7762. DAG.getConstant(SignMask, DL0, IntVT));
  7763. AddToWorklist(Int.getNode());
  7764. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
  7765. }
  7766. }
  7767. // (fneg (fmul c, x)) -> (fmul -c, x)
  7768. if (N0.getOpcode() == ISD::FMUL &&
  7769. (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
  7770. ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
  7771. if (CFP1) {
  7772. APFloat CVal = CFP1->getValueAPF();
  7773. CVal.changeSign();
  7774. if (Level >= AfterLegalizeDAG &&
  7775. (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
  7776. TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
  7777. return DAG.getNode(
  7778. ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
  7779. DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
  7780. }
  7781. }
  7782. return SDValue();
  7783. }
  7784. SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
  7785. SDValue N0 = N->getOperand(0);
  7786. SDValue N1 = N->getOperand(1);
  7787. const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7788. const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7789. if (N0CFP && N1CFP) {
  7790. const APFloat &C0 = N0CFP->getValueAPF();
  7791. const APFloat &C1 = N1CFP->getValueAPF();
  7792. return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
  7793. }
  7794. if (N0CFP) {
  7795. EVT VT = N->getValueType(0);
  7796. // Canonicalize to constant on RHS.
  7797. return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
  7798. }
  7799. return SDValue();
  7800. }
  7801. SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
  7802. SDValue N0 = N->getOperand(0);
  7803. SDValue N1 = N->getOperand(1);
  7804. const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  7805. const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  7806. if (N0CFP && N1CFP) {
  7807. const APFloat &C0 = N0CFP->getValueAPF();
  7808. const APFloat &C1 = N1CFP->getValueAPF();
  7809. return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
  7810. }
  7811. if (N0CFP) {
  7812. EVT VT = N->getValueType(0);
  7813. // Canonicalize to constant on RHS.
  7814. return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
  7815. }
  7816. return SDValue();
  7817. }
  7818. SDValue DAGCombiner::visitFABS(SDNode *N) {
  7819. SDValue N0 = N->getOperand(0);
  7820. EVT VT = N->getValueType(0);
  7821. // fold (fabs c1) -> fabs(c1)
  7822. if (isConstantFPBuildVectorOrConstantFP(N0))
  7823. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  7824. // fold (fabs (fabs x)) -> (fabs x)
  7825. if (N0.getOpcode() == ISD::FABS)
  7826. return N->getOperand(0);
  7827. // fold (fabs (fneg x)) -> (fabs x)
  7828. // fold (fabs (fcopysign x, y)) -> (fabs x)
  7829. if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
  7830. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
  7831. // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
  7832. // constant pool values.
  7833. if (!TLI.isFAbsFree(VT) &&
  7834. N0.getOpcode() == ISD::BITCAST &&
  7835. N0.getNode()->hasOneUse()) {
  7836. SDValue Int = N0.getOperand(0);
  7837. EVT IntVT = Int.getValueType();
  7838. if (IntVT.isInteger() && !IntVT.isVector()) {
  7839. APInt SignMask;
  7840. if (N0.getValueType().isVector()) {
  7841. // For a vector, get a mask such as 0x7f... per scalar element
  7842. // and splat it.
  7843. SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
  7844. SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
  7845. } else {
  7846. // For a scalar, just generate 0x7f...
  7847. SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
  7848. }
  7849. SDLoc DL(N0);
  7850. Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
  7851. DAG.getConstant(SignMask, DL, IntVT));
  7852. AddToWorklist(Int.getNode());
  7853. return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
  7854. }
  7855. }
  7856. return SDValue();
  7857. }
  7858. SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  7859. SDValue Chain = N->getOperand(0);
  7860. SDValue N1 = N->getOperand(1);
  7861. SDValue N2 = N->getOperand(2);
  7862. // If N is a constant we could fold this into a fallthrough or unconditional
  7863. // branch. However that doesn't happen very often in normal code, because
  7864. // Instcombine/SimplifyCFG should have handled the available opportunities.
  7865. // If we did this folding here, it would be necessary to update the
  7866. // MachineBasicBlock CFG, which is awkward.
  7867. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
  7868. // on the target.
  7869. if (N1.getOpcode() == ISD::SETCC &&
  7870. TLI.isOperationLegalOrCustom(ISD::BR_CC,
  7871. N1.getOperand(0).getValueType())) {
  7872. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  7873. Chain, N1.getOperand(2),
  7874. N1.getOperand(0), N1.getOperand(1), N2);
  7875. }
  7876. if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
  7877. ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
  7878. (N1.getOperand(0).hasOneUse() &&
  7879. N1.getOperand(0).getOpcode() == ISD::SRL))) {
  7880. SDNode *Trunc = nullptr;
  7881. if (N1.getOpcode() == ISD::TRUNCATE) {
  7882. // Look pass the truncate.
  7883. Trunc = N1.getNode();
  7884. N1 = N1.getOperand(0);
  7885. }
  7886. // Match this pattern so that we can generate simpler code:
  7887. //
  7888. // %a = ...
  7889. // %b = and i32 %a, 2
  7890. // %c = srl i32 %b, 1
  7891. // brcond i32 %c ...
  7892. //
  7893. // into
  7894. //
  7895. // %a = ...
  7896. // %b = and i32 %a, 2
  7897. // %c = setcc eq %b, 0
  7898. // brcond %c ...
  7899. //
  7900. // This applies only when the AND constant value has one bit set and the
  7901. // SRL constant is equal to the log2 of the AND constant. The back-end is
  7902. // smart enough to convert the result into a TEST/JMP sequence.
  7903. SDValue Op0 = N1.getOperand(0);
  7904. SDValue Op1 = N1.getOperand(1);
  7905. if (Op0.getOpcode() == ISD::AND &&
  7906. Op1.getOpcode() == ISD::Constant) {
  7907. SDValue AndOp1 = Op0.getOperand(1);
  7908. if (AndOp1.getOpcode() == ISD::Constant) {
  7909. const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
  7910. if (AndConst.isPowerOf2() &&
  7911. cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
  7912. SDLoc DL(N);
  7913. SDValue SetCC =
  7914. DAG.getSetCC(DL,
  7915. getSetCCResultType(Op0.getValueType()),
  7916. Op0, DAG.getConstant(0, DL, Op0.getValueType()),
  7917. ISD::SETNE);
  7918. SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
  7919. MVT::Other, Chain, SetCC, N2);
  7920. // Don't add the new BRCond into the worklist or else SimplifySelectCC
  7921. // will convert it back to (X & C1) >> C2.
  7922. CombineTo(N, NewBRCond, false);
  7923. // Truncate is dead.
  7924. if (Trunc)
  7925. deleteAndRecombine(Trunc);
  7926. // Replace the uses of SRL with SETCC
  7927. WorklistRemover DeadNodes(*this);
  7928. DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
  7929. deleteAndRecombine(N1.getNode());
  7930. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  7931. }
  7932. }
  7933. }
  7934. if (Trunc)
  7935. // Restore N1 if the above transformation doesn't match.
  7936. N1 = N->getOperand(1);
  7937. }
  7938. // Transform br(xor(x, y)) -> br(x != y)
  7939. // Transform br(xor(xor(x,y), 1)) -> br (x == y)
  7940. if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
  7941. SDNode *TheXor = N1.getNode();
  7942. SDValue Op0 = TheXor->getOperand(0);
  7943. SDValue Op1 = TheXor->getOperand(1);
  7944. if (Op0.getOpcode() == Op1.getOpcode()) {
  7945. // Avoid missing important xor optimizations.
  7946. SDValue Tmp = visitXOR(TheXor);
  7947. if (Tmp.getNode()) {
  7948. if (Tmp.getNode() != TheXor) {
  7949. DEBUG(dbgs() << "\nReplacing.8 ";
  7950. TheXor->dump(&DAG);
  7951. dbgs() << "\nWith: ";
  7952. Tmp.getNode()->dump(&DAG);
  7953. dbgs() << '\n');
  7954. WorklistRemover DeadNodes(*this);
  7955. DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
  7956. deleteAndRecombine(TheXor);
  7957. return DAG.getNode(ISD::BRCOND, SDLoc(N),
  7958. MVT::Other, Chain, Tmp, N2);
  7959. }
  7960. // visitXOR has changed XOR's operands or replaced the XOR completely,
  7961. // bail out.
  7962. return SDValue(N, 0);
  7963. }
  7964. }
  7965. if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
  7966. bool Equal = false;
  7967. if (isOneConstant(Op0) && Op0.hasOneUse() &&
  7968. Op0.getOpcode() == ISD::XOR) {
  7969. TheXor = Op0.getNode();
  7970. Equal = true;
  7971. }
  7972. EVT SetCCVT = N1.getValueType();
  7973. if (LegalTypes)
  7974. SetCCVT = getSetCCResultType(SetCCVT);
  7975. SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
  7976. SetCCVT,
  7977. Op0, Op1,
  7978. Equal ? ISD::SETEQ : ISD::SETNE);
  7979. // Replace the uses of XOR with SETCC
  7980. WorklistRemover DeadNodes(*this);
  7981. DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
  7982. deleteAndRecombine(N1.getNode());
  7983. return DAG.getNode(ISD::BRCOND, SDLoc(N),
  7984. MVT::Other, Chain, SetCC, N2);
  7985. }
  7986. }
  7987. return SDValue();
  7988. }
  7989. // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
  7990. //
  7991. SDValue DAGCombiner::visitBR_CC(SDNode *N) {
  7992. CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
  7993. SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
  7994. // If N is a constant we could fold this into a fallthrough or unconditional
  7995. // branch. However that doesn't happen very often in normal code, because
  7996. // Instcombine/SimplifyCFG should have handled the available opportunities.
  7997. // If we did this folding here, it would be necessary to update the
  7998. // MachineBasicBlock CFG, which is awkward.
  7999. // Use SimplifySetCC to simplify SETCC's.
  8000. SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
  8001. CondLHS, CondRHS, CC->get(), SDLoc(N),
  8002. false);
  8003. if (Simp.getNode()) AddToWorklist(Simp.getNode());
  8004. // fold to a simpler setcc
  8005. if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
  8006. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  8007. N->getOperand(0), Simp.getOperand(2),
  8008. Simp.getOperand(0), Simp.getOperand(1),
  8009. N->getOperand(4));
  8010. return SDValue();
  8011. }
  8012. /// Return true if 'Use' is a load or a store that uses N as its base pointer
  8013. /// and that N may be folded in the load / store addressing mode.
  8014. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
  8015. SelectionDAG &DAG,
  8016. const TargetLowering &TLI) {
  8017. EVT VT;
  8018. unsigned AS;
  8019. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
  8020. if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
  8021. return false;
  8022. VT = LD->getMemoryVT();
  8023. AS = LD->getAddressSpace();
  8024. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
  8025. if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
  8026. return false;
  8027. VT = ST->getMemoryVT();
  8028. AS = ST->getAddressSpace();
  8029. } else
  8030. return false;
  8031. TargetLowering::AddrMode AM;
  8032. if (N->getOpcode() == ISD::ADD) {
  8033. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  8034. if (Offset)
  8035. // [reg +/- imm]
  8036. AM.BaseOffs = Offset->getSExtValue();
  8037. else
  8038. // [reg +/- reg]
  8039. AM.Scale = 1;
  8040. } else if (N->getOpcode() == ISD::SUB) {
  8041. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  8042. if (Offset)
  8043. // [reg +/- imm]
  8044. AM.BaseOffs = -Offset->getSExtValue();
  8045. else
  8046. // [reg +/- reg]
  8047. AM.Scale = 1;
  8048. } else
  8049. return false;
  8050. return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
  8051. VT.getTypeForEVT(*DAG.getContext()), AS);
  8052. }
  8053. /// Try turning a load/store into a pre-indexed load/store when the base
  8054. /// pointer is an add or subtract and it has other uses besides the load/store.
  8055. /// After the transformation, the new indexed load/store has effectively folded
  8056. /// the add/subtract in and all of its other uses are redirected to the
  8057. /// new load/store.
  8058. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
  8059. if (Level < AfterLegalizeDAG)
  8060. return false;
  8061. bool isLoad = true;
  8062. SDValue Ptr;
  8063. EVT VT;
  8064. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  8065. if (LD->isIndexed())
  8066. return false;
  8067. VT = LD->getMemoryVT();
  8068. if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
  8069. !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
  8070. return false;
  8071. Ptr = LD->getBasePtr();
  8072. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  8073. if (ST->isIndexed())
  8074. return false;
  8075. VT = ST->getMemoryVT();
  8076. if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
  8077. !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
  8078. return false;
  8079. Ptr = ST->getBasePtr();
  8080. isLoad = false;
  8081. } else {
  8082. return false;
  8083. }
  8084. // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
  8085. // out. There is no reason to make this a preinc/predec.
  8086. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
  8087. Ptr.getNode()->hasOneUse())
  8088. return false;
  8089. // Ask the target to do addressing mode selection.
  8090. SDValue BasePtr;
  8091. SDValue Offset;
  8092. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  8093. if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
  8094. return false;
  8095. // Backends without true r+i pre-indexed forms may need to pass a
  8096. // constant base with a variable offset so that constant coercion
  8097. // will work with the patterns in canonical form.
  8098. bool Swapped = false;
  8099. if (isa<ConstantSDNode>(BasePtr)) {
  8100. std::swap(BasePtr, Offset);
  8101. Swapped = true;
  8102. }
  8103. // Don't create a indexed load / store with zero offset.
  8104. if (isNullConstant(Offset))
  8105. return false;
  8106. // Try turning it into a pre-indexed load / store except when:
  8107. // 1) The new base ptr is a frame index.
  8108. // 2) If N is a store and the new base ptr is either the same as or is a
  8109. // predecessor of the value being stored.
  8110. // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
  8111. // that would create a cycle.
  8112. // 4) All uses are load / store ops that use it as old base ptr.
  8113. // Check #1. Preinc'ing a frame index would require copying the stack pointer
  8114. // (plus the implicit offset) to a register to preinc anyway.
  8115. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  8116. return false;
  8117. // Check #2.
  8118. if (!isLoad) {
  8119. SDValue Val = cast<StoreSDNode>(N)->getValue();
  8120. if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
  8121. return false;
  8122. }
  8123. // If the offset is a constant, there may be other adds of constants that
  8124. // can be folded with this one. We should do this to avoid having to keep
  8125. // a copy of the original base pointer.
  8126. SmallVector<SDNode *, 16> OtherUses;
  8127. if (isa<ConstantSDNode>(Offset))
  8128. for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
  8129. UE = BasePtr.getNode()->use_end();
  8130. UI != UE; ++UI) {
  8131. SDUse &Use = UI.getUse();
  8132. // Skip the use that is Ptr and uses of other results from BasePtr's
  8133. // node (important for nodes that return multiple results).
  8134. if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
  8135. continue;
  8136. if (Use.getUser()->isPredecessorOf(N))
  8137. continue;
  8138. if (Use.getUser()->getOpcode() != ISD::ADD &&
  8139. Use.getUser()->getOpcode() != ISD::SUB) {
  8140. OtherUses.clear();
  8141. break;
  8142. }
  8143. SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
  8144. if (!isa<ConstantSDNode>(Op1)) {
  8145. OtherUses.clear();
  8146. break;
  8147. }
  8148. // FIXME: In some cases, we can be smarter about this.
  8149. if (Op1.getValueType() != Offset.getValueType()) {
  8150. OtherUses.clear();
  8151. break;
  8152. }
  8153. OtherUses.push_back(Use.getUser());
  8154. }
  8155. if (Swapped)
  8156. std::swap(BasePtr, Offset);
  8157. // Now check for #3 and #4.
  8158. bool RealUse = false;
  8159. // Caches for hasPredecessorHelper
  8160. SmallPtrSet<const SDNode *, 32> Visited;
  8161. SmallVector<const SDNode *, 16> Worklist;
  8162. for (SDNode *Use : Ptr.getNode()->uses()) {
  8163. if (Use == N)
  8164. continue;
  8165. if (N->hasPredecessorHelper(Use, Visited, Worklist))
  8166. return false;
  8167. // If Ptr may be folded in addressing mode of other use, then it's
  8168. // not profitable to do this transformation.
  8169. if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
  8170. RealUse = true;
  8171. }
  8172. if (!RealUse)
  8173. return false;
  8174. SDValue Result;
  8175. if (isLoad)
  8176. Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  8177. BasePtr, Offset, AM);
  8178. else
  8179. Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  8180. BasePtr, Offset, AM);
  8181. ++PreIndexedNodes;
  8182. ++NodesCombined;
  8183. DEBUG(dbgs() << "\nReplacing.4 ";
  8184. N->dump(&DAG);
  8185. dbgs() << "\nWith: ";
  8186. Result.getNode()->dump(&DAG);
  8187. dbgs() << '\n');
  8188. WorklistRemover DeadNodes(*this);
  8189. if (isLoad) {
  8190. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  8191. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  8192. } else {
  8193. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  8194. }
  8195. // Finally, since the node is now dead, remove it from the graph.
  8196. deleteAndRecombine(N);
  8197. if (Swapped)
  8198. std::swap(BasePtr, Offset);
  8199. // Replace other uses of BasePtr that can be updated to use Ptr
  8200. for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
  8201. unsigned OffsetIdx = 1;
  8202. if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
  8203. OffsetIdx = 0;
  8204. assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
  8205. BasePtr.getNode() && "Expected BasePtr operand");
  8206. // We need to replace ptr0 in the following expression:
  8207. // x0 * offset0 + y0 * ptr0 = t0
  8208. // knowing that
  8209. // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
  8210. //
  8211. // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
  8212. // indexed load/store and the expresion that needs to be re-written.
  8213. //
  8214. // Therefore, we have:
  8215. // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
  8216. ConstantSDNode *CN =
  8217. cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
  8218. int X0, X1, Y0, Y1;
  8219. APInt Offset0 = CN->getAPIntValue();
  8220. APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
  8221. X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
  8222. Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
  8223. X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
  8224. Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
  8225. unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
  8226. APInt CNV = Offset0;
  8227. if (X0 < 0) CNV = -CNV;
  8228. if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
  8229. else CNV = CNV - Offset1;
  8230. SDLoc DL(OtherUses[i]);
  8231. // We can now generate the new expression.
  8232. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
  8233. SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
  8234. SDValue NewUse = DAG.getNode(Opcode,
  8235. DL,
  8236. OtherUses[i]->getValueType(0), NewOp1, NewOp2);
  8237. DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
  8238. deleteAndRecombine(OtherUses[i]);
  8239. }
  8240. // Replace the uses of Ptr with uses of the updated base value.
  8241. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
  8242. deleteAndRecombine(Ptr.getNode());
  8243. return true;
  8244. }
  8245. /// Try to combine a load/store with a add/sub of the base pointer node into a
  8246. /// post-indexed load/store. The transformation folded the add/subtract into the
  8247. /// new indexed load/store effectively and all of its uses are redirected to the
  8248. /// new load/store.
  8249. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
  8250. if (Level < AfterLegalizeDAG)
  8251. return false;
  8252. bool isLoad = true;
  8253. SDValue Ptr;
  8254. EVT VT;
  8255. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  8256. if (LD->isIndexed())
  8257. return false;
  8258. VT = LD->getMemoryVT();
  8259. if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
  8260. !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
  8261. return false;
  8262. Ptr = LD->getBasePtr();
  8263. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  8264. if (ST->isIndexed())
  8265. return false;
  8266. VT = ST->getMemoryVT();
  8267. if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
  8268. !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
  8269. return false;
  8270. Ptr = ST->getBasePtr();
  8271. isLoad = false;
  8272. } else {
  8273. return false;
  8274. }
  8275. if (Ptr.getNode()->hasOneUse())
  8276. return false;
  8277. for (SDNode *Op : Ptr.getNode()->uses()) {
  8278. if (Op == N ||
  8279. (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
  8280. continue;
  8281. SDValue BasePtr;
  8282. SDValue Offset;
  8283. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  8284. if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
  8285. // Don't create a indexed load / store with zero offset.
  8286. if (isNullConstant(Offset))
  8287. continue;
  8288. // Try turning it into a post-indexed load / store except when
  8289. // 1) All uses are load / store ops that use it as base ptr (and
  8290. // it may be folded as addressing mmode).
  8291. // 2) Op must be independent of N, i.e. Op is neither a predecessor
  8292. // nor a successor of N. Otherwise, if Op is folded that would
  8293. // create a cycle.
  8294. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  8295. continue;
  8296. // Check for #1.
  8297. bool TryNext = false;
  8298. for (SDNode *Use : BasePtr.getNode()->uses()) {
  8299. if (Use == Ptr.getNode())
  8300. continue;
  8301. // If all the uses are load / store addresses, then don't do the
  8302. // transformation.
  8303. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
  8304. bool RealUse = false;
  8305. for (SDNode *UseUse : Use->uses()) {
  8306. if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
  8307. RealUse = true;
  8308. }
  8309. if (!RealUse) {
  8310. TryNext = true;
  8311. break;
  8312. }
  8313. }
  8314. }
  8315. if (TryNext)
  8316. continue;
  8317. // Check for #2
  8318. if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
  8319. SDValue Result = isLoad
  8320. ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  8321. BasePtr, Offset, AM)
  8322. : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  8323. BasePtr, Offset, AM);
  8324. ++PostIndexedNodes;
  8325. ++NodesCombined;
  8326. DEBUG(dbgs() << "\nReplacing.5 ";
  8327. N->dump(&DAG);
  8328. dbgs() << "\nWith: ";
  8329. Result.getNode()->dump(&DAG);
  8330. dbgs() << '\n');
  8331. WorklistRemover DeadNodes(*this);
  8332. if (isLoad) {
  8333. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  8334. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  8335. } else {
  8336. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  8337. }
  8338. // Finally, since the node is now dead, remove it from the graph.
  8339. deleteAndRecombine(N);
  8340. // Replace the uses of Use with uses of the updated base value.
  8341. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
  8342. Result.getValue(isLoad ? 1 : 0));
  8343. deleteAndRecombine(Op);
  8344. return true;
  8345. }
  8346. }
  8347. }
  8348. return false;
  8349. }
  8350. /// \brief Return the base-pointer arithmetic from an indexed \p LD.
  8351. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
  8352. ISD::MemIndexedMode AM = LD->getAddressingMode();
  8353. assert(AM != ISD::UNINDEXED);
  8354. SDValue BP = LD->getOperand(1);
  8355. SDValue Inc = LD->getOperand(2);
  8356. // Some backends use TargetConstants for load offsets, but don't expect
  8357. // TargetConstants in general ADD nodes. We can convert these constants into
  8358. // regular Constants (if the constant is not opaque).
  8359. assert((Inc.getOpcode() != ISD::TargetConstant ||
  8360. !cast<ConstantSDNode>(Inc)->isOpaque()) &&
  8361. "Cannot split out indexing using opaque target constants");
  8362. if (Inc.getOpcode() == ISD::TargetConstant) {
  8363. ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
  8364. Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
  8365. ConstInc->getValueType(0));
  8366. }
  8367. unsigned Opc =
  8368. (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
  8369. return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
  8370. }
  8371. SDValue DAGCombiner::visitLOAD(SDNode *N) {
  8372. LoadSDNode *LD = cast<LoadSDNode>(N);
  8373. SDValue Chain = LD->getChain();
  8374. SDValue Ptr = LD->getBasePtr();
  8375. // If load is not volatile and there are no uses of the loaded value (and
  8376. // the updated indexed value in case of indexed loads), change uses of the
  8377. // chain value into uses of the chain input (i.e. delete the dead load).
  8378. if (!LD->isVolatile()) {
  8379. if (N->getValueType(1) == MVT::Other) {
  8380. // Unindexed loads.
  8381. if (!N->hasAnyUseOfValue(0)) {
  8382. // It's not safe to use the two value CombineTo variant here. e.g.
  8383. // v1, chain2 = load chain1, loc
  8384. // v2, chain3 = load chain2, loc
  8385. // v3 = add v2, c
  8386. // Now we replace use of chain2 with chain1. This makes the second load
  8387. // isomorphic to the one we are deleting, and thus makes this load live.
  8388. DEBUG(dbgs() << "\nReplacing.6 ";
  8389. N->dump(&DAG);
  8390. dbgs() << "\nWith chain: ";
  8391. Chain.getNode()->dump(&DAG);
  8392. dbgs() << "\n");
  8393. WorklistRemover DeadNodes(*this);
  8394. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  8395. if (N->use_empty())
  8396. deleteAndRecombine(N);
  8397. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8398. }
  8399. } else {
  8400. // Indexed loads.
  8401. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
  8402. // If this load has an opaque TargetConstant offset, then we cannot split
  8403. // the indexing into an add/sub directly (that TargetConstant may not be
  8404. // valid for a different type of node, and we cannot convert an opaque
  8405. // target constant into a regular constant).
  8406. bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
  8407. cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
  8408. if (!N->hasAnyUseOfValue(0) &&
  8409. ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
  8410. SDValue Undef = DAG.getUNDEF(N->getValueType(0));
  8411. SDValue Index;
  8412. if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
  8413. Index = SplitIndexingFromLoad(LD);
  8414. // Try to fold the base pointer arithmetic into subsequent loads and
  8415. // stores.
  8416. AddUsersToWorklist(N);
  8417. } else
  8418. Index = DAG.getUNDEF(N->getValueType(1));
  8419. DEBUG(dbgs() << "\nReplacing.7 ";
  8420. N->dump(&DAG);
  8421. dbgs() << "\nWith: ";
  8422. Undef.getNode()->dump(&DAG);
  8423. dbgs() << " and 2 other values\n");
  8424. WorklistRemover DeadNodes(*this);
  8425. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
  8426. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
  8427. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
  8428. deleteAndRecombine(N);
  8429. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8430. }
  8431. }
  8432. }
  8433. // If this load is directly stored, replace the load value with the stored
  8434. // value.
  8435. // TODO: Handle store large -> read small portion.
  8436. // TODO: Handle TRUNCSTORE/LOADEXT
  8437. if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
  8438. if (ISD::isNON_TRUNCStore(Chain.getNode())) {
  8439. StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
  8440. if (PrevST->getBasePtr() == Ptr &&
  8441. PrevST->getValue().getValueType() == N->getValueType(0))
  8442. return CombineTo(N, Chain.getOperand(1), Chain);
  8443. }
  8444. }
  8445. // Try to infer better alignment information than the load already has.
  8446. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
  8447. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  8448. if (Align > LD->getMemOperand()->getBaseAlignment()) {
  8449. SDValue NewLoad =
  8450. DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
  8451. LD->getValueType(0),
  8452. Chain, Ptr, LD->getPointerInfo(),
  8453. LD->getMemoryVT(),
  8454. LD->isVolatile(), LD->isNonTemporal(),
  8455. LD->isInvariant(), Align, LD->getAAInfo());
  8456. if (NewLoad.getNode() != N)
  8457. return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
  8458. }
  8459. }
  8460. }
  8461. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
  8462. : DAG.getSubtarget().useAA();
  8463. #ifndef NDEBUG
  8464. if (CombinerAAOnlyFunc.getNumOccurrences() &&
  8465. CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
  8466. UseAA = false;
  8467. #endif
  8468. if (UseAA && LD->isUnindexed()) {
  8469. // Walk up chain skipping non-aliasing memory nodes.
  8470. SDValue BetterChain = FindBetterChain(N, Chain);
  8471. // If there is a better chain.
  8472. if (Chain != BetterChain) {
  8473. SDValue ReplLoad;
  8474. // Replace the chain to void dependency.
  8475. if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
  8476. ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
  8477. BetterChain, Ptr, LD->getMemOperand());
  8478. } else {
  8479. ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
  8480. LD->getValueType(0),
  8481. BetterChain, Ptr, LD->getMemoryVT(),
  8482. LD->getMemOperand());
  8483. }
  8484. // Create token factor to keep old chain connected.
  8485. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  8486. MVT::Other, Chain, ReplLoad.getValue(1));
  8487. // Make sure the new and old chains are cleaned up.
  8488. AddToWorklist(Token.getNode());
  8489. // Replace uses with load result and token factor. Don't add users
  8490. // to work list.
  8491. return CombineTo(N, ReplLoad.getValue(0), Token, false);
  8492. }
  8493. }
  8494. // Try transforming N to an indexed load.
  8495. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  8496. return SDValue(N, 0);
  8497. // Try to slice up N to more direct loads if the slices are mapped to
  8498. // different register banks or pairing can take place.
  8499. if (SliceUpLoad(N))
  8500. return SDValue(N, 0);
  8501. return SDValue();
  8502. }
  8503. namespace {
  8504. /// \brief Helper structure used to slice a load in smaller loads.
  8505. /// Basically a slice is obtained from the following sequence:
  8506. /// Origin = load Ty1, Base
  8507. /// Shift = srl Ty1 Origin, CstTy Amount
  8508. /// Inst = trunc Shift to Ty2
  8509. ///
  8510. /// Then, it will be rewriten into:
  8511. /// Slice = load SliceTy, Base + SliceOffset
  8512. /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
  8513. ///
  8514. /// SliceTy is deduced from the number of bits that are actually used to
  8515. /// build Inst.
  8516. struct LoadedSlice {
  8517. /// \brief Helper structure used to compute the cost of a slice.
  8518. struct Cost {
  8519. /// Are we optimizing for code size.
  8520. bool ForCodeSize;
  8521. /// Various cost.
  8522. unsigned Loads;
  8523. unsigned Truncates;
  8524. unsigned CrossRegisterBanksCopies;
  8525. unsigned ZExts;
  8526. unsigned Shift;
  8527. Cost(bool ForCodeSize = false)
  8528. : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
  8529. CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
  8530. /// \brief Get the cost of one isolated slice.
  8531. Cost(const LoadedSlice &LS, bool ForCodeSize = false)
  8532. : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
  8533. CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
  8534. EVT TruncType = LS.Inst->getValueType(0);
  8535. EVT LoadedType = LS.getLoadedType();
  8536. if (TruncType != LoadedType &&
  8537. !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
  8538. ZExts = 1;
  8539. }
  8540. /// \brief Account for slicing gain in the current cost.
  8541. /// Slicing provide a few gains like removing a shift or a
  8542. /// truncate. This method allows to grow the cost of the original
  8543. /// load with the gain from this slice.
  8544. void addSliceGain(const LoadedSlice &LS) {
  8545. // Each slice saves a truncate.
  8546. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
  8547. if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
  8548. LS.Inst->getOperand(0).getValueType()))
  8549. ++Truncates;
  8550. // If there is a shift amount, this slice gets rid of it.
  8551. if (LS.Shift)
  8552. ++Shift;
  8553. // If this slice can merge a cross register bank copy, account for it.
  8554. if (LS.canMergeExpensiveCrossRegisterBankCopy())
  8555. ++CrossRegisterBanksCopies;
  8556. }
  8557. Cost &operator+=(const Cost &RHS) {
  8558. Loads += RHS.Loads;
  8559. Truncates += RHS.Truncates;
  8560. CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
  8561. ZExts += RHS.ZExts;
  8562. Shift += RHS.Shift;
  8563. return *this;
  8564. }
  8565. bool operator==(const Cost &RHS) const {
  8566. return Loads == RHS.Loads && Truncates == RHS.Truncates &&
  8567. CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
  8568. ZExts == RHS.ZExts && Shift == RHS.Shift;
  8569. }
  8570. bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
  8571. bool operator<(const Cost &RHS) const {
  8572. // Assume cross register banks copies are as expensive as loads.
  8573. // FIXME: Do we want some more target hooks?
  8574. unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
  8575. unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
  8576. // Unless we are optimizing for code size, consider the
  8577. // expensive operation first.
  8578. if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
  8579. return ExpensiveOpsLHS < ExpensiveOpsRHS;
  8580. return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
  8581. (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
  8582. }
  8583. bool operator>(const Cost &RHS) const { return RHS < *this; }
  8584. bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
  8585. bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
  8586. };
  8587. // The last instruction that represent the slice. This should be a
  8588. // truncate instruction.
  8589. SDNode *Inst;
  8590. // The original load instruction.
  8591. LoadSDNode *Origin;
  8592. // The right shift amount in bits from the original load.
  8593. unsigned Shift;
  8594. // The DAG from which Origin came from.
  8595. // This is used to get some contextual information about legal types, etc.
  8596. SelectionDAG *DAG;
  8597. LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
  8598. unsigned Shift = 0, SelectionDAG *DAG = nullptr)
  8599. : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
  8600. /// \brief Get the bits used in a chunk of bits \p BitWidth large.
  8601. /// \return Result is \p BitWidth and has used bits set to 1 and
  8602. /// not used bits set to 0.
  8603. APInt getUsedBits() const {
  8604. // Reproduce the trunc(lshr) sequence:
  8605. // - Start from the truncated value.
  8606. // - Zero extend to the desired bit width.
  8607. // - Shift left.
  8608. assert(Origin && "No original load to compare against.");
  8609. unsigned BitWidth = Origin->getValueSizeInBits(0);
  8610. assert(Inst && "This slice is not bound to an instruction");
  8611. assert(Inst->getValueSizeInBits(0) <= BitWidth &&
  8612. "Extracted slice is bigger than the whole type!");
  8613. APInt UsedBits(Inst->getValueSizeInBits(0), 0);
  8614. UsedBits.setAllBits();
  8615. UsedBits = UsedBits.zext(BitWidth);
  8616. UsedBits <<= Shift;
  8617. return UsedBits;
  8618. }
  8619. /// \brief Get the size of the slice to be loaded in bytes.
  8620. unsigned getLoadedSize() const {
  8621. unsigned SliceSize = getUsedBits().countPopulation();
  8622. assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
  8623. return SliceSize / 8;
  8624. }
  8625. /// \brief Get the type that will be loaded for this slice.
  8626. /// Note: This may not be the final type for the slice.
  8627. EVT getLoadedType() const {
  8628. assert(DAG && "Missing context");
  8629. LLVMContext &Ctxt = *DAG->getContext();
  8630. return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
  8631. }
  8632. /// \brief Get the alignment of the load used for this slice.
  8633. unsigned getAlignment() const {
  8634. unsigned Alignment = Origin->getAlignment();
  8635. unsigned Offset = getOffsetFromBase();
  8636. if (Offset != 0)
  8637. Alignment = MinAlign(Alignment, Alignment + Offset);
  8638. return Alignment;
  8639. }
  8640. /// \brief Check if this slice can be rewritten with legal operations.
  8641. bool isLegal() const {
  8642. // An invalid slice is not legal.
  8643. if (!Origin || !Inst || !DAG)
  8644. return false;
  8645. // Offsets are for indexed load only, we do not handle that.
  8646. if (Origin->getOffset().getOpcode() != ISD::UNDEF)
  8647. return false;
  8648. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  8649. // Check that the type is legal.
  8650. EVT SliceType = getLoadedType();
  8651. if (!TLI.isTypeLegal(SliceType))
  8652. return false;
  8653. // Check that the load is legal for this type.
  8654. if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
  8655. return false;
  8656. // Check that the offset can be computed.
  8657. // 1. Check its type.
  8658. EVT PtrType = Origin->getBasePtr().getValueType();
  8659. if (PtrType == MVT::Untyped || PtrType.isExtended())
  8660. return false;
  8661. // 2. Check that it fits in the immediate.
  8662. if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
  8663. return false;
  8664. // 3. Check that the computation is legal.
  8665. if (!TLI.isOperationLegal(ISD::ADD, PtrType))
  8666. return false;
  8667. // Check that the zext is legal if it needs one.
  8668. EVT TruncateType = Inst->getValueType(0);
  8669. if (TruncateType != SliceType &&
  8670. !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
  8671. return false;
  8672. return true;
  8673. }
  8674. /// \brief Get the offset in bytes of this slice in the original chunk of
  8675. /// bits.
  8676. /// \pre DAG != nullptr.
  8677. uint64_t getOffsetFromBase() const {
  8678. assert(DAG && "Missing context.");
  8679. bool IsBigEndian = DAG->getDataLayout().isBigEndian();
  8680. assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
  8681. uint64_t Offset = Shift / 8;
  8682. unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
  8683. assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
  8684. "The size of the original loaded type is not a multiple of a"
  8685. " byte.");
  8686. // If Offset is bigger than TySizeInBytes, it means we are loading all
  8687. // zeros. This should have been optimized before in the process.
  8688. assert(TySizeInBytes > Offset &&
  8689. "Invalid shift amount for given loaded size");
  8690. if (IsBigEndian)
  8691. Offset = TySizeInBytes - Offset - getLoadedSize();
  8692. return Offset;
  8693. }
  8694. /// \brief Generate the sequence of instructions to load the slice
  8695. /// represented by this object and redirect the uses of this slice to
  8696. /// this new sequence of instructions.
  8697. /// \pre this->Inst && this->Origin are valid Instructions and this
  8698. /// object passed the legal check: LoadedSlice::isLegal returned true.
  8699. /// \return The last instruction of the sequence used to load the slice.
  8700. SDValue loadSlice() const {
  8701. assert(Inst && Origin && "Unable to replace a non-existing slice.");
  8702. const SDValue &OldBaseAddr = Origin->getBasePtr();
  8703. SDValue BaseAddr = OldBaseAddr;
  8704. // Get the offset in that chunk of bytes w.r.t. the endianess.
  8705. int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
  8706. assert(Offset >= 0 && "Offset too big to fit in int64_t!");
  8707. if (Offset) {
  8708. // BaseAddr = BaseAddr + Offset.
  8709. EVT ArithType = BaseAddr.getValueType();
  8710. SDLoc DL(Origin);
  8711. BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
  8712. DAG->getConstant(Offset, DL, ArithType));
  8713. }
  8714. // Create the type of the loaded slice according to its size.
  8715. EVT SliceType = getLoadedType();
  8716. // Create the load for the slice.
  8717. SDValue LastInst = DAG->getLoad(
  8718. SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
  8719. Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
  8720. Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
  8721. // If the final type is not the same as the loaded type, this means that
  8722. // we have to pad with zero. Create a zero extend for that.
  8723. EVT FinalType = Inst->getValueType(0);
  8724. if (SliceType != FinalType)
  8725. LastInst =
  8726. DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
  8727. return LastInst;
  8728. }
  8729. /// \brief Check if this slice can be merged with an expensive cross register
  8730. /// bank copy. E.g.,
  8731. /// i = load i32
  8732. /// f = bitcast i32 i to float
  8733. bool canMergeExpensiveCrossRegisterBankCopy() const {
  8734. if (!Inst || !Inst->hasOneUse())
  8735. return false;
  8736. SDNode *Use = *Inst->use_begin();
  8737. if (Use->getOpcode() != ISD::BITCAST)
  8738. return false;
  8739. assert(DAG && "Missing context");
  8740. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  8741. EVT ResVT = Use->getValueType(0);
  8742. const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
  8743. const TargetRegisterClass *ArgRC =
  8744. TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
  8745. if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
  8746. return false;
  8747. // At this point, we know that we perform a cross-register-bank copy.
  8748. // Check if it is expensive.
  8749. const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
  8750. // Assume bitcasts are cheap, unless both register classes do not
  8751. // explicitly share a common sub class.
  8752. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
  8753. return false;
  8754. // Check if it will be merged with the load.
  8755. // 1. Check the alignment constraint.
  8756. unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
  8757. ResVT.getTypeForEVT(*DAG->getContext()));
  8758. if (RequiredAlignment > getAlignment())
  8759. return false;
  8760. // 2. Check that the load is a legal operation for that type.
  8761. if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
  8762. return false;
  8763. // 3. Check that we do not have a zext in the way.
  8764. if (Inst->getValueType(0) != getLoadedType())
  8765. return false;
  8766. return true;
  8767. }
  8768. };
  8769. }
  8770. /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
  8771. /// \p UsedBits looks like 0..0 1..1 0..0.
  8772. static bool areUsedBitsDense(const APInt &UsedBits) {
  8773. // If all the bits are one, this is dense!
  8774. if (UsedBits.isAllOnesValue())
  8775. return true;
  8776. // Get rid of the unused bits on the right.
  8777. APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
  8778. // Get rid of the unused bits on the left.
  8779. if (NarrowedUsedBits.countLeadingZeros())
  8780. NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
  8781. // Check that the chunk of bits is completely used.
  8782. return NarrowedUsedBits.isAllOnesValue();
  8783. }
  8784. /// \brief Check whether or not \p First and \p Second are next to each other
  8785. /// in memory. This means that there is no hole between the bits loaded
  8786. /// by \p First and the bits loaded by \p Second.
  8787. static bool areSlicesNextToEachOther(const LoadedSlice &First,
  8788. const LoadedSlice &Second) {
  8789. assert(First.Origin == Second.Origin && First.Origin &&
  8790. "Unable to match different memory origins.");
  8791. APInt UsedBits = First.getUsedBits();
  8792. assert((UsedBits & Second.getUsedBits()) == 0 &&
  8793. "Slices are not supposed to overlap.");
  8794. UsedBits |= Second.getUsedBits();
  8795. return areUsedBitsDense(UsedBits);
  8796. }
  8797. /// \brief Adjust the \p GlobalLSCost according to the target
  8798. /// paring capabilities and the layout of the slices.
  8799. /// \pre \p GlobalLSCost should account for at least as many loads as
  8800. /// there is in the slices in \p LoadedSlices.
  8801. static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  8802. LoadedSlice::Cost &GlobalLSCost) {
  8803. unsigned NumberOfSlices = LoadedSlices.size();
  8804. // If there is less than 2 elements, no pairing is possible.
  8805. if (NumberOfSlices < 2)
  8806. return;
  8807. // Sort the slices so that elements that are likely to be next to each
  8808. // other in memory are next to each other in the list.
  8809. std::sort(LoadedSlices.begin(), LoadedSlices.end(),
  8810. [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
  8811. assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
  8812. return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
  8813. });
  8814. const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
  8815. // First (resp. Second) is the first (resp. Second) potentially candidate
  8816. // to be placed in a paired load.
  8817. const LoadedSlice *First = nullptr;
  8818. const LoadedSlice *Second = nullptr;
  8819. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
  8820. // Set the beginning of the pair.
  8821. First = Second) {
  8822. Second = &LoadedSlices[CurrSlice];
  8823. // If First is NULL, it means we start a new pair.
  8824. // Get to the next slice.
  8825. if (!First)
  8826. continue;
  8827. EVT LoadedType = First->getLoadedType();
  8828. // If the types of the slices are different, we cannot pair them.
  8829. if (LoadedType != Second->getLoadedType())
  8830. continue;
  8831. // Check if the target supplies paired loads for this type.
  8832. unsigned RequiredAlignment = 0;
  8833. if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
  8834. // move to the next pair, this type is hopeless.
  8835. Second = nullptr;
  8836. continue;
  8837. }
  8838. // Check if we meet the alignment requirement.
  8839. if (RequiredAlignment > First->getAlignment())
  8840. continue;
  8841. // Check that both loads are next to each other in memory.
  8842. if (!areSlicesNextToEachOther(*First, *Second))
  8843. continue;
  8844. assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
  8845. --GlobalLSCost.Loads;
  8846. // Move to the next pair.
  8847. Second = nullptr;
  8848. }
  8849. }
  8850. /// \brief Check the profitability of all involved LoadedSlice.
  8851. /// Currently, it is considered profitable if there is exactly two
  8852. /// involved slices (1) which are (2) next to each other in memory, and
  8853. /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
  8854. ///
  8855. /// Note: The order of the elements in \p LoadedSlices may be modified, but not
  8856. /// the elements themselves.
  8857. ///
  8858. /// FIXME: When the cost model will be mature enough, we can relax
  8859. /// constraints (1) and (2).
  8860. static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  8861. const APInt &UsedBits, bool ForCodeSize) {
  8862. unsigned NumberOfSlices = LoadedSlices.size();
  8863. if (StressLoadSlicing)
  8864. return NumberOfSlices > 1;
  8865. // Check (1).
  8866. if (NumberOfSlices != 2)
  8867. return false;
  8868. // Check (2).
  8869. if (!areUsedBitsDense(UsedBits))
  8870. return false;
  8871. // Check (3).
  8872. LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
  8873. // The original code has one big load.
  8874. OrigCost.Loads = 1;
  8875. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
  8876. const LoadedSlice &LS = LoadedSlices[CurrSlice];
  8877. // Accumulate the cost of all the slices.
  8878. LoadedSlice::Cost SliceCost(LS, ForCodeSize);
  8879. GlobalSlicingCost += SliceCost;
  8880. // Account as cost in the original configuration the gain obtained
  8881. // with the current slices.
  8882. OrigCost.addSliceGain(LS);
  8883. }
  8884. // If the target supports paired load, adjust the cost accordingly.
  8885. adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
  8886. return OrigCost > GlobalSlicingCost;
  8887. }
  8888. /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
  8889. /// operations, split it in the various pieces being extracted.
  8890. ///
  8891. /// This sort of thing is introduced by SROA.
  8892. /// This slicing takes care not to insert overlapping loads.
  8893. /// \pre LI is a simple load (i.e., not an atomic or volatile load).
  8894. bool DAGCombiner::SliceUpLoad(SDNode *N) {
  8895. if (Level < AfterLegalizeDAG)
  8896. return false;
  8897. LoadSDNode *LD = cast<LoadSDNode>(N);
  8898. if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
  8899. !LD->getValueType(0).isInteger())
  8900. return false;
  8901. // Keep track of already used bits to detect overlapping values.
  8902. // In that case, we will just abort the transformation.
  8903. APInt UsedBits(LD->getValueSizeInBits(0), 0);
  8904. SmallVector<LoadedSlice, 4> LoadedSlices;
  8905. // Check if this load is used as several smaller chunks of bits.
  8906. // Basically, look for uses in trunc or trunc(lshr) and record a new chain
  8907. // of computation for each trunc.
  8908. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
  8909. UI != UIEnd; ++UI) {
  8910. // Skip the uses of the chain.
  8911. if (UI.getUse().getResNo() != 0)
  8912. continue;
  8913. SDNode *User = *UI;
  8914. unsigned Shift = 0;
  8915. // Check if this is a trunc(lshr).
  8916. if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
  8917. isa<ConstantSDNode>(User->getOperand(1))) {
  8918. Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
  8919. User = *User->use_begin();
  8920. }
  8921. // At this point, User is a Truncate, iff we encountered, trunc or
  8922. // trunc(lshr).
  8923. if (User->getOpcode() != ISD::TRUNCATE)
  8924. return false;
  8925. // The width of the type must be a power of 2 and greater than 8-bits.
  8926. // Otherwise the load cannot be represented in LLVM IR.
  8927. // Moreover, if we shifted with a non-8-bits multiple, the slice
  8928. // will be across several bytes. We do not support that.
  8929. unsigned Width = User->getValueSizeInBits(0);
  8930. if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
  8931. return 0;
  8932. // Build the slice for this chain of computations.
  8933. LoadedSlice LS(User, LD, Shift, &DAG);
  8934. APInt CurrentUsedBits = LS.getUsedBits();
  8935. // Check if this slice overlaps with another.
  8936. if ((CurrentUsedBits & UsedBits) != 0)
  8937. return false;
  8938. // Update the bits used globally.
  8939. UsedBits |= CurrentUsedBits;
  8940. // Check if the new slice would be legal.
  8941. if (!LS.isLegal())
  8942. return false;
  8943. // Record the slice.
  8944. LoadedSlices.push_back(LS);
  8945. }
  8946. // Abort slicing if it does not seem to be profitable.
  8947. if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
  8948. return false;
  8949. ++SlicedLoads;
  8950. // Rewrite each chain to use an independent load.
  8951. // By construction, each chain can be represented by a unique load.
  8952. // Prepare the argument for the new token factor for all the slices.
  8953. SmallVector<SDValue, 8> ArgChains;
  8954. for (SmallVectorImpl<LoadedSlice>::const_iterator
  8955. LSIt = LoadedSlices.begin(),
  8956. LSItEnd = LoadedSlices.end();
  8957. LSIt != LSItEnd; ++LSIt) {
  8958. SDValue SliceInst = LSIt->loadSlice();
  8959. CombineTo(LSIt->Inst, SliceInst, true);
  8960. if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
  8961. SliceInst = SliceInst.getOperand(0);
  8962. assert(SliceInst->getOpcode() == ISD::LOAD &&
  8963. "It takes more than a zext to get to the loaded slice!!");
  8964. ArgChains.push_back(SliceInst.getValue(1));
  8965. }
  8966. SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
  8967. ArgChains);
  8968. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  8969. return true;
  8970. }
  8971. /// Check to see if V is (and load (ptr), imm), where the load is having
  8972. /// specific bytes cleared out. If so, return the byte size being masked out
  8973. /// and the shift amount.
  8974. static std::pair<unsigned, unsigned>
  8975. CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
  8976. std::pair<unsigned, unsigned> Result(0, 0);
  8977. // Check for the structure we're looking for.
  8978. if (V->getOpcode() != ISD::AND ||
  8979. !isa<ConstantSDNode>(V->getOperand(1)) ||
  8980. !ISD::isNormalLoad(V->getOperand(0).getNode()))
  8981. return Result;
  8982. // Check the chain and pointer.
  8983. LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
  8984. if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
  8985. // The store should be chained directly to the load or be an operand of a
  8986. // tokenfactor.
  8987. if (LD == Chain.getNode())
  8988. ; // ok.
  8989. else if (Chain->getOpcode() != ISD::TokenFactor)
  8990. return Result; // Fail.
  8991. else {
  8992. bool isOk = false;
  8993. for (const SDValue &ChainOp : Chain->op_values())
  8994. if (ChainOp.getNode() == LD) {
  8995. isOk = true;
  8996. break;
  8997. }
  8998. if (!isOk) return Result;
  8999. }
  9000. // This only handles simple types.
  9001. if (V.getValueType() != MVT::i16 &&
  9002. V.getValueType() != MVT::i32 &&
  9003. V.getValueType() != MVT::i64)
  9004. return Result;
  9005. // Check the constant mask. Invert it so that the bits being masked out are
  9006. // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
  9007. // follow the sign bit for uniformity.
  9008. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
  9009. unsigned NotMaskLZ = countLeadingZeros(NotMask);
  9010. if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
  9011. unsigned NotMaskTZ = countTrailingZeros(NotMask);
  9012. if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
  9013. if (NotMaskLZ == 64) return Result; // All zero mask.
  9014. // See if we have a continuous run of bits. If so, we have 0*1+0*
  9015. if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
  9016. return Result;
  9017. // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
  9018. if (V.getValueType() != MVT::i64 && NotMaskLZ)
  9019. NotMaskLZ -= 64-V.getValueSizeInBits();
  9020. unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
  9021. switch (MaskedBytes) {
  9022. case 1:
  9023. case 2:
  9024. case 4: break;
  9025. default: return Result; // All one mask, or 5-byte mask.
  9026. }
  9027. // Verify that the first bit starts at a multiple of mask so that the access
  9028. // is aligned the same as the access width.
  9029. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
  9030. Result.first = MaskedBytes;
  9031. Result.second = NotMaskTZ/8;
  9032. return Result;
  9033. }
  9034. /// Check to see if IVal is something that provides a value as specified by
  9035. /// MaskInfo. If so, replace the specified store with a narrower store of
  9036. /// truncated IVal.
  9037. static SDNode *
  9038. ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
  9039. SDValue IVal, StoreSDNode *St,
  9040. DAGCombiner *DC) {
  9041. unsigned NumBytes = MaskInfo.first;
  9042. unsigned ByteShift = MaskInfo.second;
  9043. SelectionDAG &DAG = DC->getDAG();
  9044. // Check to see if IVal is all zeros in the part being masked in by the 'or'
  9045. // that uses this. If not, this is not a replacement.
  9046. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
  9047. ByteShift*8, (ByteShift+NumBytes)*8);
  9048. if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
  9049. // Check that it is legal on the target to do this. It is legal if the new
  9050. // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
  9051. // legalization.
  9052. MVT VT = MVT::getIntegerVT(NumBytes*8);
  9053. if (!DC->isTypeLegal(VT))
  9054. return nullptr;
  9055. // Okay, we can do this! Replace the 'St' store with a store of IVal that is
  9056. // shifted by ByteShift and truncated down to NumBytes.
  9057. if (ByteShift) {
  9058. SDLoc DL(IVal);
  9059. IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
  9060. DAG.getConstant(ByteShift*8, DL,
  9061. DC->getShiftAmountTy(IVal.getValueType())));
  9062. }
  9063. // Figure out the offset for the store and the alignment of the access.
  9064. unsigned StOffset;
  9065. unsigned NewAlign = St->getAlignment();
  9066. if (DAG.getDataLayout().isLittleEndian())
  9067. StOffset = ByteShift;
  9068. else
  9069. StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
  9070. SDValue Ptr = St->getBasePtr();
  9071. if (StOffset) {
  9072. SDLoc DL(IVal);
  9073. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
  9074. Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
  9075. NewAlign = MinAlign(NewAlign, StOffset);
  9076. }
  9077. // Truncate down to the new size.
  9078. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
  9079. ++OpsNarrowed;
  9080. return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
  9081. St->getPointerInfo().getWithOffset(StOffset),
  9082. false, false, NewAlign).getNode();
  9083. }
  9084. /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
  9085. /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
  9086. /// narrowing the load and store if it would end up being a win for performance
  9087. /// or code size.
  9088. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  9089. StoreSDNode *ST = cast<StoreSDNode>(N);
  9090. if (ST->isVolatile())
  9091. return SDValue();
  9092. SDValue Chain = ST->getChain();
  9093. SDValue Value = ST->getValue();
  9094. SDValue Ptr = ST->getBasePtr();
  9095. EVT VT = Value.getValueType();
  9096. if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
  9097. return SDValue();
  9098. unsigned Opc = Value.getOpcode();
  9099. // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
  9100. // is a byte mask indicating a consecutive number of bytes, check to see if
  9101. // Y is known to provide just those bytes. If so, we try to replace the
  9102. // load + replace + store sequence with a single (narrower) store, which makes
  9103. // the load dead.
  9104. if (Opc == ISD::OR) {
  9105. std::pair<unsigned, unsigned> MaskedLoad;
  9106. MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
  9107. if (MaskedLoad.first)
  9108. if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  9109. Value.getOperand(1), ST,this))
  9110. return SDValue(NewST, 0);
  9111. // Or is commutative, so try swapping X and Y.
  9112. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
  9113. if (MaskedLoad.first)
  9114. if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  9115. Value.getOperand(0), ST,this))
  9116. return SDValue(NewST, 0);
  9117. }
  9118. if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
  9119. Value.getOperand(1).getOpcode() != ISD::Constant)
  9120. return SDValue();
  9121. SDValue N0 = Value.getOperand(0);
  9122. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  9123. Chain == SDValue(N0.getNode(), 1)) {
  9124. LoadSDNode *LD = cast<LoadSDNode>(N0);
  9125. if (LD->getBasePtr() != Ptr ||
  9126. LD->getPointerInfo().getAddrSpace() !=
  9127. ST->getPointerInfo().getAddrSpace())
  9128. return SDValue();
  9129. // Find the type to narrow it the load / op / store to.
  9130. SDValue N1 = Value.getOperand(1);
  9131. unsigned BitWidth = N1.getValueSizeInBits();
  9132. APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
  9133. if (Opc == ISD::AND)
  9134. Imm ^= APInt::getAllOnesValue(BitWidth);
  9135. if (Imm == 0 || Imm.isAllOnesValue())
  9136. return SDValue();
  9137. unsigned ShAmt = Imm.countTrailingZeros();
  9138. unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
  9139. unsigned NewBW = NextPowerOf2(MSB - ShAmt);
  9140. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  9141. // The narrowing should be profitable, the load/store operation should be
  9142. // legal (or custom) and the store size should be equal to the NewVT width.
  9143. while (NewBW < BitWidth &&
  9144. (NewVT.getStoreSizeInBits() != NewBW ||
  9145. !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
  9146. !TLI.isNarrowingProfitable(VT, NewVT))) {
  9147. NewBW = NextPowerOf2(NewBW);
  9148. NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  9149. }
  9150. if (NewBW >= BitWidth)
  9151. return SDValue();
  9152. // If the lsb changed does not start at the type bitwidth boundary,
  9153. // start at the previous one.
  9154. if (ShAmt % NewBW)
  9155. ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
  9156. APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
  9157. std::min(BitWidth, ShAmt + NewBW));
  9158. if ((Imm & Mask) == Imm) {
  9159. APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
  9160. if (Opc == ISD::AND)
  9161. NewImm ^= APInt::getAllOnesValue(NewBW);
  9162. uint64_t PtrOff = ShAmt / 8;
  9163. // For big endian targets, we need to adjust the offset to the pointer to
  9164. // load the correct bytes.
  9165. if (DAG.getDataLayout().isBigEndian())
  9166. PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  9167. unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
  9168. Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
  9169. if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
  9170. return SDValue();
  9171. SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
  9172. Ptr.getValueType(), Ptr,
  9173. DAG.getConstant(PtrOff, SDLoc(LD),
  9174. Ptr.getValueType()));
  9175. SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
  9176. LD->getChain(), NewPtr,
  9177. LD->getPointerInfo().getWithOffset(PtrOff),
  9178. LD->isVolatile(), LD->isNonTemporal(),
  9179. LD->isInvariant(), NewAlign,
  9180. LD->getAAInfo());
  9181. SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
  9182. DAG.getConstant(NewImm, SDLoc(Value),
  9183. NewVT));
  9184. SDValue NewST = DAG.getStore(Chain, SDLoc(N),
  9185. NewVal, NewPtr,
  9186. ST->getPointerInfo().getWithOffset(PtrOff),
  9187. false, false, NewAlign);
  9188. AddToWorklist(NewPtr.getNode());
  9189. AddToWorklist(NewLD.getNode());
  9190. AddToWorklist(NewVal.getNode());
  9191. WorklistRemover DeadNodes(*this);
  9192. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
  9193. ++OpsNarrowed;
  9194. return NewST;
  9195. }
  9196. }
  9197. return SDValue();
  9198. }
  9199. /// For a given floating point load / store pair, if the load value isn't used
  9200. /// by any other operations, then consider transforming the pair to integer
  9201. /// load / store operations if the target deems the transformation profitable.
  9202. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
  9203. StoreSDNode *ST = cast<StoreSDNode>(N);
  9204. SDValue Chain = ST->getChain();
  9205. SDValue Value = ST->getValue();
  9206. if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
  9207. Value.hasOneUse() &&
  9208. Chain == SDValue(Value.getNode(), 1)) {
  9209. LoadSDNode *LD = cast<LoadSDNode>(Value);
  9210. EVT VT = LD->getMemoryVT();
  9211. if (!VT.isFloatingPoint() ||
  9212. VT != ST->getMemoryVT() ||
  9213. LD->isNonTemporal() ||
  9214. ST->isNonTemporal() ||
  9215. LD->getPointerInfo().getAddrSpace() != 0 ||
  9216. ST->getPointerInfo().getAddrSpace() != 0)
  9217. return SDValue();
  9218. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  9219. if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
  9220. !TLI.isOperationLegal(ISD::STORE, IntVT) ||
  9221. !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
  9222. !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
  9223. return SDValue();
  9224. unsigned LDAlign = LD->getAlignment();
  9225. unsigned STAlign = ST->getAlignment();
  9226. Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
  9227. unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
  9228. if (LDAlign < ABIAlign || STAlign < ABIAlign)
  9229. return SDValue();
  9230. SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
  9231. LD->getChain(), LD->getBasePtr(),
  9232. LD->getPointerInfo(),
  9233. false, false, false, LDAlign);
  9234. SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
  9235. NewLD, ST->getBasePtr(),
  9236. ST->getPointerInfo(),
  9237. false, false, STAlign);
  9238. AddToWorklist(NewLD.getNode());
  9239. AddToWorklist(NewST.getNode());
  9240. WorklistRemover DeadNodes(*this);
  9241. DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
  9242. ++LdStFP2Int;
  9243. return NewST;
  9244. }
  9245. return SDValue();
  9246. }
  9247. namespace {
  9248. /// Helper struct to parse and store a memory address as base + index + offset.
  9249. /// We ignore sign extensions when it is safe to do so.
  9250. /// The following two expressions are not equivalent. To differentiate we need
  9251. /// to store whether there was a sign extension involved in the index
  9252. /// computation.
  9253. /// (load (i64 add (i64 copyfromreg %c)
  9254. /// (i64 signextend (add (i8 load %index)
  9255. /// (i8 1))))
  9256. /// vs
  9257. ///
  9258. /// (load (i64 add (i64 copyfromreg %c)
  9259. /// (i64 signextend (i32 add (i32 signextend (i8 load %index))
  9260. /// (i32 1)))))
  9261. struct BaseIndexOffset {
  9262. SDValue Base;
  9263. SDValue Index;
  9264. int64_t Offset;
  9265. bool IsIndexSignExt;
  9266. BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
  9267. BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
  9268. bool IsIndexSignExt) :
  9269. Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
  9270. bool equalBaseIndex(const BaseIndexOffset &Other) {
  9271. return Other.Base == Base && Other.Index == Index &&
  9272. Other.IsIndexSignExt == IsIndexSignExt;
  9273. }
  9274. /// Parses tree in Ptr for base, index, offset addresses.
  9275. static BaseIndexOffset match(SDValue Ptr) {
  9276. bool IsIndexSignExt = false;
  9277. // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
  9278. // instruction, then it could be just the BASE or everything else we don't
  9279. // know how to handle. Just use Ptr as BASE and give up.
  9280. if (Ptr->getOpcode() != ISD::ADD)
  9281. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  9282. // We know that we have at least an ADD instruction. Try to pattern match
  9283. // the simple case of BASE + OFFSET.
  9284. if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
  9285. int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
  9286. return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
  9287. IsIndexSignExt);
  9288. }
  9289. // Inside a loop the current BASE pointer is calculated using an ADD and a
  9290. // MUL instruction. In this case Ptr is the actual BASE pointer.
  9291. // (i64 add (i64 %array_ptr)
  9292. // (i64 mul (i64 %induction_var)
  9293. // (i64 %element_size)))
  9294. if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
  9295. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  9296. // Look at Base + Index + Offset cases.
  9297. SDValue Base = Ptr->getOperand(0);
  9298. SDValue IndexOffset = Ptr->getOperand(1);
  9299. // Skip signextends.
  9300. if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
  9301. IndexOffset = IndexOffset->getOperand(0);
  9302. IsIndexSignExt = true;
  9303. }
  9304. // Either the case of Base + Index (no offset) or something else.
  9305. if (IndexOffset->getOpcode() != ISD::ADD)
  9306. return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
  9307. // Now we have the case of Base + Index + offset.
  9308. SDValue Index = IndexOffset->getOperand(0);
  9309. SDValue Offset = IndexOffset->getOperand(1);
  9310. if (!isa<ConstantSDNode>(Offset))
  9311. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  9312. // Ignore signextends.
  9313. if (Index->getOpcode() == ISD::SIGN_EXTEND) {
  9314. Index = Index->getOperand(0);
  9315. IsIndexSignExt = true;
  9316. } else IsIndexSignExt = false;
  9317. int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
  9318. return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
  9319. }
  9320. };
  9321. } // namespace
  9322. SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
  9323. SDLoc SL,
  9324. ArrayRef<MemOpLink> Stores,
  9325. EVT Ty) const {
  9326. SmallVector<SDValue, 8> BuildVector;
  9327. for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
  9328. BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
  9329. return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
  9330. }
  9331. bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
  9332. SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
  9333. unsigned NumElem, bool IsConstantSrc, bool UseVector) {
  9334. // Make sure we have something to merge.
  9335. if (NumElem < 2)
  9336. return false;
  9337. int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
  9338. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  9339. unsigned LatestNodeUsed = 0;
  9340. for (unsigned i=0; i < NumElem; ++i) {
  9341. // Find a chain for the new wide-store operand. Notice that some
  9342. // of the store nodes that we found may not be selected for inclusion
  9343. // in the wide store. The chain we use needs to be the chain of the
  9344. // latest store node which is *used* and replaced by the wide store.
  9345. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
  9346. LatestNodeUsed = i;
  9347. }
  9348. // The latest Node in the DAG.
  9349. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
  9350. SDLoc DL(StoreNodes[0].MemNode);
  9351. SDValue StoredVal;
  9352. if (UseVector) {
  9353. // Find a legal type for the vector store.
  9354. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
  9355. assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
  9356. if (IsConstantSrc) {
  9357. StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
  9358. } else {
  9359. SmallVector<SDValue, 8> Ops;
  9360. for (unsigned i = 0; i < NumElem ; ++i) {
  9361. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9362. SDValue Val = St->getValue();
  9363. // All of the operands of a BUILD_VECTOR must have the same type.
  9364. if (Val.getValueType() != MemVT)
  9365. return false;
  9366. Ops.push_back(Val);
  9367. }
  9368. // Build the extracted vector elements back into a vector.
  9369. StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
  9370. }
  9371. } else {
  9372. // We should always use a vector store when merging extracted vector
  9373. // elements, so this path implies a store of constants.
  9374. assert(IsConstantSrc && "Merged vector elements should use vector store");
  9375. unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
  9376. APInt StoreInt(SizeInBits, 0);
  9377. // Construct a single integer constant which is made of the smaller
  9378. // constant inputs.
  9379. bool IsLE = DAG.getDataLayout().isLittleEndian();
  9380. for (unsigned i = 0; i < NumElem ; ++i) {
  9381. unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
  9382. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
  9383. SDValue Val = St->getValue();
  9384. StoreInt <<= ElementSizeBytes * 8;
  9385. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
  9386. StoreInt |= C->getAPIntValue().zext(SizeInBits);
  9387. } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
  9388. StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
  9389. } else {
  9390. llvm_unreachable("Invalid constant element type");
  9391. }
  9392. }
  9393. // Create the new Load and Store operations.
  9394. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
  9395. StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
  9396. }
  9397. SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
  9398. FirstInChain->getBasePtr(),
  9399. FirstInChain->getPointerInfo(),
  9400. false, false,
  9401. FirstInChain->getAlignment());
  9402. // Replace the last store with the new store
  9403. CombineTo(LatestOp, NewStore);
  9404. // Erase all other stores.
  9405. for (unsigned i = 0; i < NumElem ; ++i) {
  9406. if (StoreNodes[i].MemNode == LatestOp)
  9407. continue;
  9408. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9409. // ReplaceAllUsesWith will replace all uses that existed when it was
  9410. // called, but graph optimizations may cause new ones to appear. For
  9411. // example, the case in pr14333 looks like
  9412. //
  9413. // St's chain -> St -> another store -> X
  9414. //
  9415. // And the only difference from St to the other store is the chain.
  9416. // When we change it's chain to be St's chain they become identical,
  9417. // get CSEed and the net result is that X is now a use of St.
  9418. // Since we know that St is redundant, just iterate.
  9419. while (!St->use_empty())
  9420. DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
  9421. deleteAndRecombine(St);
  9422. }
  9423. return true;
  9424. }
  9425. static bool allowableAlignment(const SelectionDAG &DAG,
  9426. const TargetLowering &TLI, EVT EVTTy,
  9427. unsigned AS, unsigned Align) {
  9428. if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
  9429. return true;
  9430. Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
  9431. unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
  9432. return (Align >= ABIAlignment);
  9433. }
  9434. void DAGCombiner::getStoreMergeAndAliasCandidates(
  9435. StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
  9436. SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
  9437. // This holds the base pointer, index, and the offset in bytes from the base
  9438. // pointer.
  9439. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
  9440. // We must have a base and an offset.
  9441. if (!BasePtr.Base.getNode())
  9442. return;
  9443. // Do not handle stores to undef base pointers.
  9444. if (BasePtr.Base.getOpcode() == ISD::UNDEF)
  9445. return;
  9446. // Walk up the chain and look for nodes with offsets from the same
  9447. // base pointer. Stop when reaching an instruction with a different kind
  9448. // or instruction which has a different base pointer.
  9449. EVT MemVT = St->getMemoryVT();
  9450. unsigned Seq = 0;
  9451. StoreSDNode *Index = St;
  9452. while (Index) {
  9453. // If the chain has more than one use, then we can't reorder the mem ops.
  9454. if (Index != St && !SDValue(Index, 0)->hasOneUse())
  9455. break;
  9456. // Find the base pointer and offset for this memory node.
  9457. BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
  9458. // Check that the base pointer is the same as the original one.
  9459. if (!Ptr.equalBaseIndex(BasePtr))
  9460. break;
  9461. // The memory operands must not be volatile.
  9462. if (Index->isVolatile() || Index->isIndexed())
  9463. break;
  9464. // No truncation.
  9465. if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
  9466. if (St->isTruncatingStore())
  9467. break;
  9468. // The stored memory type must be the same.
  9469. if (Index->getMemoryVT() != MemVT)
  9470. break;
  9471. // We found a potential memory operand to merge.
  9472. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
  9473. // Find the next memory operand in the chain. If the next operand in the
  9474. // chain is a store then move up and continue the scan with the next
  9475. // memory operand. If the next operand is a load save it and use alias
  9476. // information to check if it interferes with anything.
  9477. SDNode *NextInChain = Index->getChain().getNode();
  9478. while (1) {
  9479. if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
  9480. // We found a store node. Use it for the next iteration.
  9481. Index = STn;
  9482. break;
  9483. } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
  9484. if (Ldn->isVolatile()) {
  9485. Index = nullptr;
  9486. break;
  9487. }
  9488. // Save the load node for later. Continue the scan.
  9489. AliasLoadNodes.push_back(Ldn);
  9490. NextInChain = Ldn->getChain().getNode();
  9491. continue;
  9492. } else {
  9493. Index = nullptr;
  9494. break;
  9495. }
  9496. }
  9497. }
  9498. }
  9499. bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  9500. if (OptLevel == CodeGenOpt::None)
  9501. return false;
  9502. EVT MemVT = St->getMemoryVT();
  9503. int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
  9504. bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
  9505. Attribute::NoImplicitFloat);
  9506. // This function cannot currently deal with non-byte-sized memory sizes.
  9507. if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
  9508. return false;
  9509. // Don't merge vectors into wider inputs.
  9510. if (MemVT.isVector() || !MemVT.isSimple())
  9511. return false;
  9512. // Perform an early exit check. Do not bother looking at stored values that
  9513. // are not constants, loads, or extracted vector elements.
  9514. SDValue StoredVal = St->getValue();
  9515. bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
  9516. bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
  9517. isa<ConstantFPSDNode>(StoredVal);
  9518. bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
  9519. if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
  9520. return false;
  9521. // Only look at ends of store sequences.
  9522. SDValue Chain = SDValue(St, 0);
  9523. if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
  9524. return false;
  9525. // Save the LoadSDNodes that we find in the chain.
  9526. // We need to make sure that these nodes do not interfere with
  9527. // any of the store nodes.
  9528. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
  9529. // Save the StoreSDNodes that we find in the chain.
  9530. SmallVector<MemOpLink, 8> StoreNodes;
  9531. getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
  9532. // Check if there is anything to merge.
  9533. if (StoreNodes.size() < 2)
  9534. return false;
  9535. // Sort the memory operands according to their distance from the base pointer.
  9536. std::sort(StoreNodes.begin(), StoreNodes.end(),
  9537. [](MemOpLink LHS, MemOpLink RHS) {
  9538. return LHS.OffsetFromBase < RHS.OffsetFromBase ||
  9539. (LHS.OffsetFromBase == RHS.OffsetFromBase &&
  9540. LHS.SequenceNum > RHS.SequenceNum);
  9541. });
  9542. // Scan the memory operations on the chain and find the first non-consecutive
  9543. // store memory address.
  9544. unsigned LastConsecutiveStore = 0;
  9545. int64_t StartAddress = StoreNodes[0].OffsetFromBase;
  9546. for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
  9547. // Check that the addresses are consecutive starting from the second
  9548. // element in the list of stores.
  9549. if (i > 0) {
  9550. int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
  9551. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  9552. break;
  9553. }
  9554. bool Alias = false;
  9555. // Check if this store interferes with any of the loads that we found.
  9556. for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
  9557. if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
  9558. Alias = true;
  9559. break;
  9560. }
  9561. // We found a load that alias with this store. Stop the sequence.
  9562. if (Alias)
  9563. break;
  9564. // Mark this node as useful.
  9565. LastConsecutiveStore = i;
  9566. }
  9567. // The node with the lowest store address.
  9568. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  9569. unsigned FirstStoreAS = FirstInChain->getAddressSpace();
  9570. unsigned FirstStoreAlign = FirstInChain->getAlignment();
  9571. // Store the constants into memory as one consecutive store.
  9572. if (IsConstantSrc) {
  9573. unsigned LastLegalType = 0;
  9574. unsigned LastLegalVectorType = 0;
  9575. bool NonZero = false;
  9576. for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
  9577. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9578. SDValue StoredVal = St->getValue();
  9579. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
  9580. NonZero |= !C->isNullValue();
  9581. } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
  9582. NonZero |= !C->getConstantFPValue()->isNullValue();
  9583. } else {
  9584. // Non-constant.
  9585. break;
  9586. }
  9587. // Find a legal type for the constant store.
  9588. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
  9589. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
  9590. if (TLI.isTypeLegal(StoreTy) &&
  9591. allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
  9592. FirstStoreAlign)) {
  9593. LastLegalType = i+1;
  9594. // Or check whether a truncstore is legal.
  9595. } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
  9596. TargetLowering::TypePromoteInteger) {
  9597. EVT LegalizedStoredValueTy =
  9598. TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
  9599. if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
  9600. allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
  9601. FirstStoreAlign)) {
  9602. LastLegalType = i + 1;
  9603. }
  9604. }
  9605. // Find a legal type for the vector store.
  9606. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
  9607. if (TLI.isTypeLegal(Ty) &&
  9608. allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
  9609. LastLegalVectorType = i + 1;
  9610. }
  9611. }
  9612. // We only use vectors if the constant is known to be zero or the target
  9613. // allows it and the function is not marked with the noimplicitfloat
  9614. // attribute.
  9615. if (NoVectors) {
  9616. LastLegalVectorType = 0;
  9617. } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
  9618. LastLegalVectorType,
  9619. FirstStoreAS)) {
  9620. LastLegalVectorType = 0;
  9621. }
  9622. // Check if we found a legal integer type to store.
  9623. if (LastLegalType == 0 && LastLegalVectorType == 0)
  9624. return false;
  9625. bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
  9626. unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
  9627. return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
  9628. true, UseVector);
  9629. }
  9630. // When extracting multiple vector elements, try to store them
  9631. // in one vector store rather than a sequence of scalar stores.
  9632. if (IsExtractVecEltSrc) {
  9633. unsigned NumElem = 0;
  9634. for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
  9635. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9636. SDValue StoredVal = St->getValue();
  9637. // This restriction could be loosened.
  9638. // Bail out if any stored values are not elements extracted from a vector.
  9639. // It should be possible to handle mixed sources, but load sources need
  9640. // more careful handling (see the block of code below that handles
  9641. // consecutive loads).
  9642. if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  9643. return false;
  9644. // Find a legal type for the vector store.
  9645. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
  9646. if (TLI.isTypeLegal(Ty) &&
  9647. allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
  9648. NumElem = i + 1;
  9649. }
  9650. return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
  9651. false, true);
  9652. }
  9653. // Below we handle the case of multiple consecutive stores that
  9654. // come from multiple consecutive loads. We merge them into a single
  9655. // wide load and a single wide store.
  9656. // Look for load nodes which are used by the stored values.
  9657. SmallVector<MemOpLink, 8> LoadNodes;
  9658. // Find acceptable loads. Loads need to have the same chain (token factor),
  9659. // must not be zext, volatile, indexed, and they must be consecutive.
  9660. BaseIndexOffset LdBasePtr;
  9661. for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
  9662. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9663. LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
  9664. if (!Ld) break;
  9665. // Loads must only have one use.
  9666. if (!Ld->hasNUsesOfValue(1, 0))
  9667. break;
  9668. // The memory operands must not be volatile.
  9669. if (Ld->isVolatile() || Ld->isIndexed())
  9670. break;
  9671. // We do not accept ext loads.
  9672. if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
  9673. break;
  9674. // The stored memory type must be the same.
  9675. if (Ld->getMemoryVT() != MemVT)
  9676. break;
  9677. BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
  9678. // If this is not the first ptr that we check.
  9679. if (LdBasePtr.Base.getNode()) {
  9680. // The base ptr must be the same.
  9681. if (!LdPtr.equalBaseIndex(LdBasePtr))
  9682. break;
  9683. } else {
  9684. // Check that all other base pointers are the same as this one.
  9685. LdBasePtr = LdPtr;
  9686. }
  9687. // We found a potential memory operand to merge.
  9688. LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
  9689. }
  9690. if (LoadNodes.size() < 2)
  9691. return false;
  9692. // If we have load/store pair instructions and we only have two values,
  9693. // don't bother.
  9694. unsigned RequiredAlignment;
  9695. if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
  9696. St->getAlignment() >= RequiredAlignment)
  9697. return false;
  9698. LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
  9699. unsigned FirstLoadAS = FirstLoad->getAddressSpace();
  9700. unsigned FirstLoadAlign = FirstLoad->getAlignment();
  9701. // Scan the memory operations on the chain and find the first non-consecutive
  9702. // load memory address. These variables hold the index in the store node
  9703. // array.
  9704. unsigned LastConsecutiveLoad = 0;
  9705. // This variable refers to the size and not index in the array.
  9706. unsigned LastLegalVectorType = 0;
  9707. unsigned LastLegalIntegerType = 0;
  9708. StartAddress = LoadNodes[0].OffsetFromBase;
  9709. SDValue FirstChain = FirstLoad->getChain();
  9710. for (unsigned i = 1; i < LoadNodes.size(); ++i) {
  9711. // All loads much share the same chain.
  9712. if (LoadNodes[i].MemNode->getChain() != FirstChain)
  9713. break;
  9714. int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
  9715. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  9716. break;
  9717. LastConsecutiveLoad = i;
  9718. // Find a legal type for the vector store.
  9719. EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
  9720. if (TLI.isTypeLegal(StoreTy) &&
  9721. allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
  9722. allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
  9723. LastLegalVectorType = i + 1;
  9724. }
  9725. // Find a legal type for the integer store.
  9726. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
  9727. StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
  9728. if (TLI.isTypeLegal(StoreTy) &&
  9729. allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
  9730. allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
  9731. LastLegalIntegerType = i + 1;
  9732. // Or check whether a truncstore and extload is legal.
  9733. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
  9734. TargetLowering::TypePromoteInteger) {
  9735. EVT LegalizedStoredValueTy =
  9736. TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
  9737. if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
  9738. TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
  9739. TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
  9740. TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
  9741. allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
  9742. FirstStoreAlign) &&
  9743. allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
  9744. FirstLoadAlign))
  9745. LastLegalIntegerType = i+1;
  9746. }
  9747. }
  9748. // Only use vector types if the vector type is larger than the integer type.
  9749. // If they are the same, use integers.
  9750. bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
  9751. unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
  9752. // We add +1 here because the LastXXX variables refer to location while
  9753. // the NumElem refers to array/index size.
  9754. unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
  9755. NumElem = std::min(LastLegalType, NumElem);
  9756. if (NumElem < 2)
  9757. return false;
  9758. // The latest Node in the DAG.
  9759. unsigned LatestNodeUsed = 0;
  9760. for (unsigned i=1; i<NumElem; ++i) {
  9761. // Find a chain for the new wide-store operand. Notice that some
  9762. // of the store nodes that we found may not be selected for inclusion
  9763. // in the wide store. The chain we use needs to be the chain of the
  9764. // latest store node which is *used* and replaced by the wide store.
  9765. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
  9766. LatestNodeUsed = i;
  9767. }
  9768. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
  9769. // Find if it is better to use vectors or integers to load and store
  9770. // to memory.
  9771. EVT JointMemOpVT;
  9772. if (UseVectorTy) {
  9773. JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
  9774. } else {
  9775. unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
  9776. JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
  9777. }
  9778. SDLoc LoadDL(LoadNodes[0].MemNode);
  9779. SDLoc StoreDL(StoreNodes[0].MemNode);
  9780. SDValue NewLoad = DAG.getLoad(
  9781. JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
  9782. FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
  9783. SDValue NewStore = DAG.getStore(
  9784. LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
  9785. FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
  9786. // Replace one of the loads with the new load.
  9787. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
  9788. DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
  9789. SDValue(NewLoad.getNode(), 1));
  9790. // Remove the rest of the load chains.
  9791. for (unsigned i = 1; i < NumElem ; ++i) {
  9792. // Replace all chain users of the old load nodes with the chain of the new
  9793. // load node.
  9794. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
  9795. DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
  9796. }
  9797. // Replace the last store with the new store.
  9798. CombineTo(LatestOp, NewStore);
  9799. // Erase all other stores.
  9800. for (unsigned i = 0; i < NumElem ; ++i) {
  9801. // Remove all Store nodes.
  9802. if (StoreNodes[i].MemNode == LatestOp)
  9803. continue;
  9804. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  9805. DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
  9806. deleteAndRecombine(St);
  9807. }
  9808. return true;
  9809. }
  9810. SDValue DAGCombiner::visitSTORE(SDNode *N) {
  9811. StoreSDNode *ST = cast<StoreSDNode>(N);
  9812. SDValue Chain = ST->getChain();
  9813. SDValue Value = ST->getValue();
  9814. SDValue Ptr = ST->getBasePtr();
  9815. // If this is a store of a bit convert, store the input value if the
  9816. // resultant store does not need a higher alignment than the original.
  9817. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
  9818. ST->isUnindexed()) {
  9819. unsigned OrigAlign = ST->getAlignment();
  9820. EVT SVT = Value.getOperand(0).getValueType();
  9821. unsigned Align = DAG.getDataLayout().getABITypeAlignment(
  9822. SVT.getTypeForEVT(*DAG.getContext()));
  9823. if (Align <= OrigAlign &&
  9824. ((!LegalOperations && !ST->isVolatile()) ||
  9825. TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
  9826. return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
  9827. Ptr, ST->getPointerInfo(), ST->isVolatile(),
  9828. ST->isNonTemporal(), OrigAlign,
  9829. ST->getAAInfo());
  9830. }
  9831. // Turn 'store undef, Ptr' -> nothing.
  9832. if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
  9833. return Chain;
  9834. // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
  9835. if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
  9836. // NOTE: If the original store is volatile, this transform must not increase
  9837. // the number of stores. For example, on x86-32 an f64 can be stored in one
  9838. // processor operation but an i64 (which is not legal) requires two. So the
  9839. // transform should not be done in this case.
  9840. if (Value.getOpcode() != ISD::TargetConstantFP) {
  9841. SDValue Tmp;
  9842. switch (CFP->getSimpleValueType(0).SimpleTy) {
  9843. default: llvm_unreachable("Unknown FP type");
  9844. case MVT::f16: // We don't do this for these yet.
  9845. case MVT::f80:
  9846. case MVT::f128:
  9847. case MVT::ppcf128:
  9848. break;
  9849. case MVT::f32:
  9850. if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
  9851. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  9852. ;
  9853. Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
  9854. bitcastToAPInt().getZExtValue(), SDLoc(CFP),
  9855. MVT::i32);
  9856. return DAG.getStore(Chain, SDLoc(N), Tmp,
  9857. Ptr, ST->getMemOperand());
  9858. }
  9859. break;
  9860. case MVT::f64:
  9861. if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
  9862. !ST->isVolatile()) ||
  9863. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
  9864. ;
  9865. Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
  9866. getZExtValue(), SDLoc(CFP), MVT::i64);
  9867. return DAG.getStore(Chain, SDLoc(N), Tmp,
  9868. Ptr, ST->getMemOperand());
  9869. }
  9870. if (!ST->isVolatile() &&
  9871. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  9872. // Many FP stores are not made apparent until after legalize, e.g. for
  9873. // argument passing. Since this is so common, custom legalize the
  9874. // 64-bit integer store into two 32-bit stores.
  9875. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
  9876. SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
  9877. SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
  9878. if (DAG.getDataLayout().isBigEndian())
  9879. std::swap(Lo, Hi);
  9880. unsigned Alignment = ST->getAlignment();
  9881. bool isVolatile = ST->isVolatile();
  9882. bool isNonTemporal = ST->isNonTemporal();
  9883. AAMDNodes AAInfo = ST->getAAInfo();
  9884. SDLoc DL(N);
  9885. SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
  9886. Ptr, ST->getPointerInfo(),
  9887. isVolatile, isNonTemporal,
  9888. ST->getAlignment(), AAInfo);
  9889. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
  9890. DAG.getConstant(4, DL, Ptr.getValueType()));
  9891. Alignment = MinAlign(Alignment, 4U);
  9892. SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
  9893. Ptr, ST->getPointerInfo().getWithOffset(4),
  9894. isVolatile, isNonTemporal,
  9895. Alignment, AAInfo);
  9896. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
  9897. St0, St1);
  9898. }
  9899. break;
  9900. }
  9901. }
  9902. }
  9903. // Try to infer better alignment information than the store already has.
  9904. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
  9905. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  9906. if (Align > ST->getAlignment()) {
  9907. SDValue NewStore =
  9908. DAG.getTruncStore(Chain, SDLoc(N), Value,
  9909. Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
  9910. ST->isVolatile(), ST->isNonTemporal(), Align,
  9911. ST->getAAInfo());
  9912. if (NewStore.getNode() != N)
  9913. return CombineTo(ST, NewStore, true);
  9914. }
  9915. }
  9916. }
  9917. // Try transforming a pair floating point load / store ops to integer
  9918. // load / store ops.
  9919. SDValue NewST = TransformFPLoadStorePair(N);
  9920. if (NewST.getNode())
  9921. return NewST;
  9922. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
  9923. : DAG.getSubtarget().useAA();
  9924. #ifndef NDEBUG
  9925. if (CombinerAAOnlyFunc.getNumOccurrences() &&
  9926. CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
  9927. UseAA = false;
  9928. #endif
  9929. if (UseAA && ST->isUnindexed()) {
  9930. // Walk up chain skipping non-aliasing memory nodes.
  9931. SDValue BetterChain = FindBetterChain(N, Chain);
  9932. // If there is a better chain.
  9933. if (Chain != BetterChain) {
  9934. SDValue ReplStore;
  9935. // Replace the chain to avoid dependency.
  9936. if (ST->isTruncatingStore()) {
  9937. ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
  9938. ST->getMemoryVT(), ST->getMemOperand());
  9939. } else {
  9940. ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
  9941. ST->getMemOperand());
  9942. }
  9943. // Create token to keep both nodes around.
  9944. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  9945. MVT::Other, Chain, ReplStore);
  9946. // Make sure the new and old chains are cleaned up.
  9947. AddToWorklist(Token.getNode());
  9948. // Don't add users to work list.
  9949. return CombineTo(N, Token, false);
  9950. }
  9951. }
  9952. // Try transforming N to an indexed store.
  9953. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  9954. return SDValue(N, 0);
  9955. // FIXME: is there such a thing as a truncating indexed store?
  9956. if (ST->isTruncatingStore() && ST->isUnindexed() &&
  9957. Value.getValueType().isInteger()) {
  9958. // See if we can simplify the input to this truncstore with knowledge that
  9959. // only the low bits are being used. For example:
  9960. // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
  9961. SDValue Shorter =
  9962. GetDemandedBits(Value,
  9963. APInt::getLowBitsSet(
  9964. Value.getValueType().getScalarType().getSizeInBits(),
  9965. ST->getMemoryVT().getScalarType().getSizeInBits()));
  9966. AddToWorklist(Value.getNode());
  9967. if (Shorter.getNode())
  9968. return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
  9969. Ptr, ST->getMemoryVT(), ST->getMemOperand());
  9970. // Otherwise, see if we can simplify the operation with
  9971. // SimplifyDemandedBits, which only works if the value has a single use.
  9972. if (SimplifyDemandedBits(Value,
  9973. APInt::getLowBitsSet(
  9974. Value.getValueType().getScalarType().getSizeInBits(),
  9975. ST->getMemoryVT().getScalarType().getSizeInBits())))
  9976. return SDValue(N, 0);
  9977. }
  9978. // If this is a load followed by a store to the same location, then the store
  9979. // is dead/noop.
  9980. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
  9981. if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
  9982. ST->isUnindexed() && !ST->isVolatile() &&
  9983. // There can't be any side effects between the load and store, such as
  9984. // a call or store.
  9985. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
  9986. // The store is dead, remove it.
  9987. return Chain;
  9988. }
  9989. }
  9990. // If this is a store followed by a store with the same value to the same
  9991. // location, then the store is dead/noop.
  9992. if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
  9993. if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
  9994. ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
  9995. ST1->isUnindexed() && !ST1->isVolatile()) {
  9996. // The store is dead, remove it.
  9997. return Chain;
  9998. }
  9999. }
  10000. // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
  10001. // truncating store. We can do this even if this is already a truncstore.
  10002. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
  10003. && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
  10004. TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
  10005. ST->getMemoryVT())) {
  10006. return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
  10007. Ptr, ST->getMemoryVT(), ST->getMemOperand());
  10008. }
  10009. // Only perform this optimization before the types are legal, because we
  10010. // don't want to perform this optimization on every DAGCombine invocation.
  10011. if (!LegalTypes) {
  10012. bool EverChanged = false;
  10013. do {
  10014. // There can be multiple store sequences on the same chain.
  10015. // Keep trying to merge store sequences until we are unable to do so
  10016. // or until we merge the last store on the chain.
  10017. bool Changed = MergeConsecutiveStores(ST);
  10018. EverChanged |= Changed;
  10019. if (!Changed) break;
  10020. } while (ST->getOpcode() != ISD::DELETED_NODE);
  10021. if (EverChanged)
  10022. return SDValue(N, 0);
  10023. }
  10024. return ReduceLoadOpStoreWidth(N);
  10025. }
  10026. SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
  10027. SDValue InVec = N->getOperand(0);
  10028. SDValue InVal = N->getOperand(1);
  10029. SDValue EltNo = N->getOperand(2);
  10030. SDLoc dl(N);
  10031. // If the inserted element is an UNDEF, just use the input vector.
  10032. if (InVal.getOpcode() == ISD::UNDEF)
  10033. return InVec;
  10034. EVT VT = InVec.getValueType();
  10035. // If we can't generate a legal BUILD_VECTOR, exit
  10036. if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  10037. return SDValue();
  10038. // Check that we know which element is being inserted
  10039. if (!isa<ConstantSDNode>(EltNo))
  10040. return SDValue();
  10041. unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  10042. // Canonicalize insert_vector_elt dag nodes.
  10043. // Example:
  10044. // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
  10045. // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
  10046. //
  10047. // Do this only if the child insert_vector node has one use; also
  10048. // do this only if indices are both constants and Idx1 < Idx0.
  10049. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
  10050. && isa<ConstantSDNode>(InVec.getOperand(2))) {
  10051. unsigned OtherElt =
  10052. cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
  10053. if (Elt < OtherElt) {
  10054. // Swap nodes.
  10055. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
  10056. InVec.getOperand(0), InVal, EltNo);
  10057. AddToWorklist(NewOp.getNode());
  10058. return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
  10059. VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
  10060. }
  10061. }
  10062. // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
  10063. // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
  10064. // vector elements.
  10065. SmallVector<SDValue, 8> Ops;
  10066. // Do not combine these two vectors if the output vector will not replace
  10067. // the input vector.
  10068. if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
  10069. Ops.append(InVec.getNode()->op_begin(),
  10070. InVec.getNode()->op_end());
  10071. } else if (InVec.getOpcode() == ISD::UNDEF) {
  10072. unsigned NElts = VT.getVectorNumElements();
  10073. Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
  10074. } else {
  10075. return SDValue();
  10076. }
  10077. // Insert the element
  10078. if (Elt < Ops.size()) {
  10079. // All the operands of BUILD_VECTOR must have the same type;
  10080. // we enforce that here.
  10081. EVT OpVT = Ops[0].getValueType();
  10082. if (InVal.getValueType() != OpVT)
  10083. InVal = OpVT.bitsGT(InVal.getValueType()) ?
  10084. DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
  10085. DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
  10086. Ops[Elt] = InVal;
  10087. }
  10088. // Return the new vector
  10089. return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
  10090. }
  10091. SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
  10092. SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
  10093. EVT ResultVT = EVE->getValueType(0);
  10094. EVT VecEltVT = InVecVT.getVectorElementType();
  10095. unsigned Align = OriginalLoad->getAlignment();
  10096. unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
  10097. VecEltVT.getTypeForEVT(*DAG.getContext()));
  10098. if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
  10099. return SDValue();
  10100. Align = NewAlign;
  10101. SDValue NewPtr = OriginalLoad->getBasePtr();
  10102. SDValue Offset;
  10103. EVT PtrType = NewPtr.getValueType();
  10104. MachinePointerInfo MPI;
  10105. SDLoc DL(EVE);
  10106. if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
  10107. int Elt = ConstEltNo->getZExtValue();
  10108. unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
  10109. Offset = DAG.getConstant(PtrOff, DL, PtrType);
  10110. MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
  10111. } else {
  10112. Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
  10113. Offset = DAG.getNode(
  10114. ISD::MUL, DL, PtrType, Offset,
  10115. DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
  10116. MPI = OriginalLoad->getPointerInfo();
  10117. }
  10118. NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
  10119. // The replacement we need to do here is a little tricky: we need to
  10120. // replace an extractelement of a load with a load.
  10121. // Use ReplaceAllUsesOfValuesWith to do the replacement.
  10122. // Note that this replacement assumes that the extractvalue is the only
  10123. // use of the load; that's okay because we don't want to perform this
  10124. // transformation in other cases anyway.
  10125. SDValue Load;
  10126. SDValue Chain;
  10127. if (ResultVT.bitsGT(VecEltVT)) {
  10128. // If the result type of vextract is wider than the load, then issue an
  10129. // extending load instead.
  10130. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
  10131. VecEltVT)
  10132. ? ISD::ZEXTLOAD
  10133. : ISD::EXTLOAD;
  10134. Load = DAG.getExtLoad(
  10135. ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
  10136. VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
  10137. OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
  10138. Chain = Load.getValue(1);
  10139. } else {
  10140. Load = DAG.getLoad(
  10141. VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
  10142. OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
  10143. OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
  10144. Chain = Load.getValue(1);
  10145. if (ResultVT.bitsLT(VecEltVT))
  10146. Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
  10147. else
  10148. Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
  10149. }
  10150. WorklistRemover DeadNodes(*this);
  10151. SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
  10152. SDValue To[] = { Load, Chain };
  10153. DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
  10154. // Since we're explicitly calling ReplaceAllUses, add the new node to the
  10155. // worklist explicitly as well.
  10156. AddToWorklist(Load.getNode());
  10157. AddUsersToWorklist(Load.getNode()); // Add users too
  10158. // Make sure to revisit this node to clean it up; it will usually be dead.
  10159. AddToWorklist(EVE);
  10160. ++OpsNarrowed;
  10161. return SDValue(EVE, 0);
  10162. }
  10163. SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  10164. // (vextract (scalar_to_vector val, 0) -> val
  10165. SDValue InVec = N->getOperand(0);
  10166. EVT VT = InVec.getValueType();
  10167. EVT NVT = N->getValueType(0);
  10168. if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
  10169. // Check if the result type doesn't match the inserted element type. A
  10170. // SCALAR_TO_VECTOR may truncate the inserted element and the
  10171. // EXTRACT_VECTOR_ELT may widen the extracted vector.
  10172. SDValue InOp = InVec.getOperand(0);
  10173. if (InOp.getValueType() != NVT) {
  10174. assert(InOp.getValueType().isInteger() && NVT.isInteger());
  10175. return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
  10176. }
  10177. return InOp;
  10178. }
  10179. SDValue EltNo = N->getOperand(1);
  10180. bool ConstEltNo = isa<ConstantSDNode>(EltNo);
  10181. // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
  10182. // We only perform this optimization before the op legalization phase because
  10183. // we may introduce new vector instructions which are not backed by TD
  10184. // patterns. For example on AVX, extracting elements from a wide vector
  10185. // without using extract_subvector. However, if we can find an underlying
  10186. // scalar value, then we can always use that.
  10187. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
  10188. && ConstEltNo) {
  10189. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  10190. int NumElem = VT.getVectorNumElements();
  10191. ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
  10192. // Find the new index to extract from.
  10193. int OrigElt = SVOp->getMaskElt(Elt);
  10194. // Extracting an undef index is undef.
  10195. if (OrigElt == -1)
  10196. return DAG.getUNDEF(NVT);
  10197. // Select the right vector half to extract from.
  10198. SDValue SVInVec;
  10199. if (OrigElt < NumElem) {
  10200. SVInVec = InVec->getOperand(0);
  10201. } else {
  10202. SVInVec = InVec->getOperand(1);
  10203. OrigElt -= NumElem;
  10204. }
  10205. if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
  10206. SDValue InOp = SVInVec.getOperand(OrigElt);
  10207. if (InOp.getValueType() != NVT) {
  10208. assert(InOp.getValueType().isInteger() && NVT.isInteger());
  10209. InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
  10210. }
  10211. return InOp;
  10212. }
  10213. // FIXME: We should handle recursing on other vector shuffles and
  10214. // scalar_to_vector here as well.
  10215. if (!LegalOperations) {
  10216. EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  10217. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
  10218. DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
  10219. }
  10220. }
  10221. bool BCNumEltsChanged = false;
  10222. EVT ExtVT = VT.getVectorElementType();
  10223. EVT LVT = ExtVT;
  10224. // If the result of load has to be truncated, then it's not necessarily
  10225. // profitable.
  10226. if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
  10227. return SDValue();
  10228. if (InVec.getOpcode() == ISD::BITCAST) {
  10229. // Don't duplicate a load with other uses.
  10230. if (!InVec.hasOneUse())
  10231. return SDValue();
  10232. EVT BCVT = InVec.getOperand(0).getValueType();
  10233. if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
  10234. return SDValue();
  10235. if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
  10236. BCNumEltsChanged = true;
  10237. InVec = InVec.getOperand(0);
  10238. ExtVT = BCVT.getVectorElementType();
  10239. }
  10240. // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
  10241. if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
  10242. ISD::isNormalLoad(InVec.getNode()) &&
  10243. !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
  10244. SDValue Index = N->getOperand(1);
  10245. if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
  10246. return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
  10247. OrigLoad);
  10248. }
  10249. // Perform only after legalization to ensure build_vector / vector_shuffle
  10250. // optimizations have already been done.
  10251. if (!LegalOperations) return SDValue();
  10252. // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
  10253. // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
  10254. // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
  10255. if (ConstEltNo) {
  10256. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  10257. LoadSDNode *LN0 = nullptr;
  10258. const ShuffleVectorSDNode *SVN = nullptr;
  10259. if (ISD::isNormalLoad(InVec.getNode())) {
  10260. LN0 = cast<LoadSDNode>(InVec);
  10261. } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  10262. InVec.getOperand(0).getValueType() == ExtVT &&
  10263. ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
  10264. // Don't duplicate a load with other uses.
  10265. if (!InVec.hasOneUse())
  10266. return SDValue();
  10267. LN0 = cast<LoadSDNode>(InVec.getOperand(0));
  10268. } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
  10269. // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
  10270. // =>
  10271. // (load $addr+1*size)
  10272. // Don't duplicate a load with other uses.
  10273. if (!InVec.hasOneUse())
  10274. return SDValue();
  10275. // If the bit convert changed the number of elements, it is unsafe
  10276. // to examine the mask.
  10277. if (BCNumEltsChanged)
  10278. return SDValue();
  10279. // Select the input vector, guarding against out of range extract vector.
  10280. unsigned NumElems = VT.getVectorNumElements();
  10281. int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
  10282. InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
  10283. if (InVec.getOpcode() == ISD::BITCAST) {
  10284. // Don't duplicate a load with other uses.
  10285. if (!InVec.hasOneUse())
  10286. return SDValue();
  10287. InVec = InVec.getOperand(0);
  10288. }
  10289. if (ISD::isNormalLoad(InVec.getNode())) {
  10290. LN0 = cast<LoadSDNode>(InVec);
  10291. Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
  10292. EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
  10293. }
  10294. }
  10295. // Make sure we found a non-volatile load and the extractelement is
  10296. // the only use.
  10297. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
  10298. return SDValue();
  10299. // If Idx was -1 above, Elt is going to be -1, so just return undef.
  10300. if (Elt == -1)
  10301. return DAG.getUNDEF(LVT);
  10302. return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
  10303. }
  10304. return SDValue();
  10305. }
  10306. // Simplify (build_vec (ext )) to (bitcast (build_vec ))
  10307. SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
  10308. // We perform this optimization post type-legalization because
  10309. // the type-legalizer often scalarizes integer-promoted vectors.
  10310. // Performing this optimization before may create bit-casts which
  10311. // will be type-legalized to complex code sequences.
  10312. // We perform this optimization only before the operation legalizer because we
  10313. // may introduce illegal operations.
  10314. if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
  10315. return SDValue();
  10316. unsigned NumInScalars = N->getNumOperands();
  10317. SDLoc dl(N);
  10318. EVT VT = N->getValueType(0);
  10319. // Check to see if this is a BUILD_VECTOR of a bunch of values
  10320. // which come from any_extend or zero_extend nodes. If so, we can create
  10321. // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
  10322. // optimizations. We do not handle sign-extend because we can't fill the sign
  10323. // using shuffles.
  10324. EVT SourceType = MVT::Other;
  10325. bool AllAnyExt = true;
  10326. for (unsigned i = 0; i != NumInScalars; ++i) {
  10327. SDValue In = N->getOperand(i);
  10328. // Ignore undef inputs.
  10329. if (In.getOpcode() == ISD::UNDEF) continue;
  10330. bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
  10331. bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
  10332. // Abort if the element is not an extension.
  10333. if (!ZeroExt && !AnyExt) {
  10334. SourceType = MVT::Other;
  10335. break;
  10336. }
  10337. // The input is a ZeroExt or AnyExt. Check the original type.
  10338. EVT InTy = In.getOperand(0).getValueType();
  10339. // Check that all of the widened source types are the same.
  10340. if (SourceType == MVT::Other)
  10341. // First time.
  10342. SourceType = InTy;
  10343. else if (InTy != SourceType) {
  10344. // Multiple income types. Abort.
  10345. SourceType = MVT::Other;
  10346. break;
  10347. }
  10348. // Check if all of the extends are ANY_EXTENDs.
  10349. AllAnyExt &= AnyExt;
  10350. }
  10351. // In order to have valid types, all of the inputs must be extended from the
  10352. // same source type and all of the inputs must be any or zero extend.
  10353. // Scalar sizes must be a power of two.
  10354. EVT OutScalarTy = VT.getScalarType();
  10355. bool ValidTypes = SourceType != MVT::Other &&
  10356. isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
  10357. isPowerOf2_32(SourceType.getSizeInBits());
  10358. // Create a new simpler BUILD_VECTOR sequence which other optimizations can
  10359. // turn into a single shuffle instruction.
  10360. if (!ValidTypes)
  10361. return SDValue();
  10362. bool isLE = DAG.getDataLayout().isLittleEndian();
  10363. unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
  10364. assert(ElemRatio > 1 && "Invalid element size ratio");
  10365. SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
  10366. DAG.getConstant(0, SDLoc(N), SourceType);
  10367. unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
  10368. SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
  10369. // Populate the new build_vector
  10370. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  10371. SDValue Cast = N->getOperand(i);
  10372. assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
  10373. Cast.getOpcode() == ISD::ZERO_EXTEND ||
  10374. Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
  10375. SDValue In;
  10376. if (Cast.getOpcode() == ISD::UNDEF)
  10377. In = DAG.getUNDEF(SourceType);
  10378. else
  10379. In = Cast->getOperand(0);
  10380. unsigned Index = isLE ? (i * ElemRatio) :
  10381. (i * ElemRatio + (ElemRatio - 1));
  10382. assert(Index < Ops.size() && "Invalid index");
  10383. Ops[Index] = In;
  10384. }
  10385. // The type of the new BUILD_VECTOR node.
  10386. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
  10387. assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
  10388. "Invalid vector size");
  10389. // Check if the new vector type is legal.
  10390. if (!isTypeLegal(VecVT)) return SDValue();
  10391. // Make the new BUILD_VECTOR.
  10392. SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
  10393. // The new BUILD_VECTOR node has the potential to be further optimized.
  10394. AddToWorklist(BV.getNode());
  10395. // Bitcast to the desired type.
  10396. return DAG.getNode(ISD::BITCAST, dl, VT, BV);
  10397. }
  10398. SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
  10399. EVT VT = N->getValueType(0);
  10400. unsigned NumInScalars = N->getNumOperands();
  10401. SDLoc dl(N);
  10402. EVT SrcVT = MVT::Other;
  10403. unsigned Opcode = ISD::DELETED_NODE;
  10404. unsigned NumDefs = 0;
  10405. for (unsigned i = 0; i != NumInScalars; ++i) {
  10406. SDValue In = N->getOperand(i);
  10407. unsigned Opc = In.getOpcode();
  10408. if (Opc == ISD::UNDEF)
  10409. continue;
  10410. // If all scalar values are floats and converted from integers.
  10411. if (Opcode == ISD::DELETED_NODE &&
  10412. (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
  10413. Opcode = Opc;
  10414. }
  10415. if (Opc != Opcode)
  10416. return SDValue();
  10417. EVT InVT = In.getOperand(0).getValueType();
  10418. // If all scalar values are typed differently, bail out. It's chosen to
  10419. // simplify BUILD_VECTOR of integer types.
  10420. if (SrcVT == MVT::Other)
  10421. SrcVT = InVT;
  10422. if (SrcVT != InVT)
  10423. return SDValue();
  10424. NumDefs++;
  10425. }
  10426. // If the vector has just one element defined, it's not worth to fold it into
  10427. // a vectorized one.
  10428. if (NumDefs < 2)
  10429. return SDValue();
  10430. assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
  10431. && "Should only handle conversion from integer to float.");
  10432. assert(SrcVT != MVT::Other && "Cannot determine source type!");
  10433. EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
  10434. if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
  10435. return SDValue();
  10436. // Just because the floating-point vector type is legal does not necessarily
  10437. // mean that the corresponding integer vector type is.
  10438. if (!isTypeLegal(NVT))
  10439. return SDValue();
  10440. SmallVector<SDValue, 8> Opnds;
  10441. for (unsigned i = 0; i != NumInScalars; ++i) {
  10442. SDValue In = N->getOperand(i);
  10443. if (In.getOpcode() == ISD::UNDEF)
  10444. Opnds.push_back(DAG.getUNDEF(SrcVT));
  10445. else
  10446. Opnds.push_back(In.getOperand(0));
  10447. }
  10448. SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
  10449. AddToWorklist(BV.getNode());
  10450. return DAG.getNode(Opcode, dl, VT, BV);
  10451. }
  10452. SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  10453. unsigned NumInScalars = N->getNumOperands();
  10454. SDLoc dl(N);
  10455. EVT VT = N->getValueType(0);
  10456. // A vector built entirely of undefs is undef.
  10457. if (ISD::allOperandsUndef(N))
  10458. return DAG.getUNDEF(VT);
  10459. if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
  10460. return V;
  10461. if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
  10462. return V;
  10463. // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
  10464. // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
  10465. // at most two distinct vectors, turn this into a shuffle node.
  10466. // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
  10467. if (!isTypeLegal(VT))
  10468. return SDValue();
  10469. // May only combine to shuffle after legalize if shuffle is legal.
  10470. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
  10471. return SDValue();
  10472. SDValue VecIn1, VecIn2;
  10473. bool UsesZeroVector = false;
  10474. for (unsigned i = 0; i != NumInScalars; ++i) {
  10475. SDValue Op = N->getOperand(i);
  10476. // Ignore undef inputs.
  10477. if (Op.getOpcode() == ISD::UNDEF) continue;
  10478. // See if we can combine this build_vector into a blend with a zero vector.
  10479. if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
  10480. UsesZeroVector = true;
  10481. continue;
  10482. }
  10483. // If this input is something other than a EXTRACT_VECTOR_ELT with a
  10484. // constant index, bail out.
  10485. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
  10486. !isa<ConstantSDNode>(Op.getOperand(1))) {
  10487. VecIn1 = VecIn2 = SDValue(nullptr, 0);
  10488. break;
  10489. }
  10490. // We allow up to two distinct input vectors.
  10491. SDValue ExtractedFromVec = Op.getOperand(0);
  10492. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
  10493. continue;
  10494. if (!VecIn1.getNode()) {
  10495. VecIn1 = ExtractedFromVec;
  10496. } else if (!VecIn2.getNode() && !UsesZeroVector) {
  10497. VecIn2 = ExtractedFromVec;
  10498. } else {
  10499. // Too many inputs.
  10500. VecIn1 = VecIn2 = SDValue(nullptr, 0);
  10501. break;
  10502. }
  10503. }
  10504. // If everything is good, we can make a shuffle operation.
  10505. if (VecIn1.getNode()) {
  10506. unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
  10507. SmallVector<int, 8> Mask;
  10508. for (unsigned i = 0; i != NumInScalars; ++i) {
  10509. unsigned Opcode = N->getOperand(i).getOpcode();
  10510. if (Opcode == ISD::UNDEF) {
  10511. Mask.push_back(-1);
  10512. continue;
  10513. }
  10514. // Operands can also be zero.
  10515. if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
  10516. assert(UsesZeroVector &&
  10517. (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
  10518. "Unexpected node found!");
  10519. Mask.push_back(NumInScalars+i);
  10520. continue;
  10521. }
  10522. // If extracting from the first vector, just use the index directly.
  10523. SDValue Extract = N->getOperand(i);
  10524. SDValue ExtVal = Extract.getOperand(1);
  10525. unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
  10526. if (Extract.getOperand(0) == VecIn1) {
  10527. Mask.push_back(ExtIndex);
  10528. continue;
  10529. }
  10530. // Otherwise, use InIdx + InputVecSize
  10531. Mask.push_back(InNumElements + ExtIndex);
  10532. }
  10533. // Avoid introducing illegal shuffles with zero.
  10534. if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
  10535. return SDValue();
  10536. // We can't generate a shuffle node with mismatched input and output types.
  10537. // Attempt to transform a single input vector to the correct type.
  10538. if ((VT != VecIn1.getValueType())) {
  10539. // If the input vector type has a different base type to the output
  10540. // vector type, bail out.
  10541. EVT VTElemType = VT.getVectorElementType();
  10542. if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
  10543. (VecIn2.getNode() &&
  10544. (VecIn2.getValueType().getVectorElementType() != VTElemType)))
  10545. return SDValue();
  10546. // If the input vector is too small, widen it.
  10547. // We only support widening of vectors which are half the size of the
  10548. // output registers. For example XMM->YMM widening on X86 with AVX.
  10549. EVT VecInT = VecIn1.getValueType();
  10550. if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
  10551. // If we only have one small input, widen it by adding undef values.
  10552. if (!VecIn2.getNode())
  10553. VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
  10554. DAG.getUNDEF(VecIn1.getValueType()));
  10555. else if (VecIn1.getValueType() == VecIn2.getValueType()) {
  10556. // If we have two small inputs of the same type, try to concat them.
  10557. VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
  10558. VecIn2 = SDValue(nullptr, 0);
  10559. } else
  10560. return SDValue();
  10561. } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
  10562. // If the input vector is too large, try to split it.
  10563. // We don't support having two input vectors that are too large.
  10564. // If the zero vector was used, we can not split the vector,
  10565. // since we'd need 3 inputs.
  10566. if (UsesZeroVector || VecIn2.getNode())
  10567. return SDValue();
  10568. if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
  10569. return SDValue();
  10570. // Try to replace VecIn1 with two extract_subvectors
  10571. // No need to update the masks, they should still be correct.
  10572. VecIn2 = DAG.getNode(
  10573. ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
  10574. DAG.getConstant(VT.getVectorNumElements(), dl,
  10575. TLI.getVectorIdxTy(DAG.getDataLayout())));
  10576. VecIn1 = DAG.getNode(
  10577. ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
  10578. DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
  10579. } else
  10580. return SDValue();
  10581. }
  10582. if (UsesZeroVector)
  10583. VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
  10584. DAG.getConstantFP(0.0, dl, VT);
  10585. else
  10586. // If VecIn2 is unused then change it to undef.
  10587. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
  10588. // Check that we were able to transform all incoming values to the same
  10589. // type.
  10590. if (VecIn2.getValueType() != VecIn1.getValueType() ||
  10591. VecIn1.getValueType() != VT)
  10592. return SDValue();
  10593. // Return the new VECTOR_SHUFFLE node.
  10594. SDValue Ops[2];
  10595. Ops[0] = VecIn1;
  10596. Ops[1] = VecIn2;
  10597. return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
  10598. }
  10599. return SDValue();
  10600. }
  10601. static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
  10602. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  10603. EVT OpVT = N->getOperand(0).getValueType();
  10604. // If the operands are legal vectors, leave them alone.
  10605. if (TLI.isTypeLegal(OpVT))
  10606. return SDValue();
  10607. SDLoc DL(N);
  10608. EVT VT = N->getValueType(0);
  10609. SmallVector<SDValue, 8> Ops;
  10610. EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
  10611. SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
  10612. // Keep track of what we encounter.
  10613. bool AnyInteger = false;
  10614. bool AnyFP = false;
  10615. for (const SDValue &Op : N->ops()) {
  10616. if (ISD::BITCAST == Op.getOpcode() &&
  10617. !Op.getOperand(0).getValueType().isVector())
  10618. Ops.push_back(Op.getOperand(0));
  10619. else if (ISD::UNDEF == Op.getOpcode())
  10620. Ops.push_back(ScalarUndef);
  10621. else
  10622. return SDValue();
  10623. // Note whether we encounter an integer or floating point scalar.
  10624. // If it's neither, bail out, it could be something weird like x86mmx.
  10625. EVT LastOpVT = Ops.back().getValueType();
  10626. if (LastOpVT.isFloatingPoint())
  10627. AnyFP = true;
  10628. else if (LastOpVT.isInteger())
  10629. AnyInteger = true;
  10630. else
  10631. return SDValue();
  10632. }
  10633. // If any of the operands is a floating point scalar bitcast to a vector,
  10634. // use floating point types throughout, and bitcast everything.
  10635. // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
  10636. if (AnyFP) {
  10637. SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
  10638. ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
  10639. if (AnyInteger) {
  10640. for (SDValue &Op : Ops) {
  10641. if (Op.getValueType() == SVT)
  10642. continue;
  10643. if (Op.getOpcode() == ISD::UNDEF)
  10644. Op = ScalarUndef;
  10645. else
  10646. Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
  10647. }
  10648. }
  10649. }
  10650. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
  10651. VT.getSizeInBits() / SVT.getSizeInBits());
  10652. return DAG.getNode(ISD::BITCAST, DL, VT,
  10653. DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
  10654. }
  10655. SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
  10656. // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
  10657. // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
  10658. // inputs come from at most two distinct vectors, turn this into a shuffle
  10659. // node.
  10660. // If we only have one input vector, we don't need to do any concatenation.
  10661. if (N->getNumOperands() == 1)
  10662. return N->getOperand(0);
  10663. // Check if all of the operands are undefs.
  10664. EVT VT = N->getValueType(0);
  10665. if (ISD::allOperandsUndef(N))
  10666. return DAG.getUNDEF(VT);
  10667. // Optimize concat_vectors where all but the first of the vectors are undef.
  10668. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
  10669. return Op.getOpcode() == ISD::UNDEF;
  10670. })) {
  10671. SDValue In = N->getOperand(0);
  10672. assert(In.getValueType().isVector() && "Must concat vectors");
  10673. // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
  10674. if (In->getOpcode() == ISD::BITCAST &&
  10675. !In->getOperand(0)->getValueType(0).isVector()) {
  10676. SDValue Scalar = In->getOperand(0);
  10677. // If the bitcast type isn't legal, it might be a trunc of a legal type;
  10678. // look through the trunc so we can still do the transform:
  10679. // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
  10680. if (Scalar->getOpcode() == ISD::TRUNCATE &&
  10681. !TLI.isTypeLegal(Scalar.getValueType()) &&
  10682. TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
  10683. Scalar = Scalar->getOperand(0);
  10684. EVT SclTy = Scalar->getValueType(0);
  10685. if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
  10686. return SDValue();
  10687. EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
  10688. VT.getSizeInBits() / SclTy.getSizeInBits());
  10689. if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
  10690. return SDValue();
  10691. SDLoc dl = SDLoc(N);
  10692. SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
  10693. return DAG.getNode(ISD::BITCAST, dl, VT, Res);
  10694. }
  10695. }
  10696. // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
  10697. // We have already tested above for an UNDEF only concatenation.
  10698. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
  10699. // -> (BUILD_VECTOR A, B, ..., C, D, ...)
  10700. auto IsBuildVectorOrUndef = [](const SDValue &Op) {
  10701. return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
  10702. };
  10703. bool AllBuildVectorsOrUndefs =
  10704. std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
  10705. if (AllBuildVectorsOrUndefs) {
  10706. SmallVector<SDValue, 8> Opnds;
  10707. EVT SVT = VT.getScalarType();
  10708. EVT MinVT = SVT;
  10709. if (!SVT.isFloatingPoint()) {
  10710. // If BUILD_VECTOR are from built from integer, they may have different
  10711. // operand types. Get the smallest type and truncate all operands to it.
  10712. bool FoundMinVT = false;
  10713. for (const SDValue &Op : N->ops())
  10714. if (ISD::BUILD_VECTOR == Op.getOpcode()) {
  10715. EVT OpSVT = Op.getOperand(0)->getValueType(0);
  10716. MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
  10717. FoundMinVT = true;
  10718. }
  10719. assert(FoundMinVT && "Concat vector type mismatch");
  10720. }
  10721. for (const SDValue &Op : N->ops()) {
  10722. EVT OpVT = Op.getValueType();
  10723. unsigned NumElts = OpVT.getVectorNumElements();
  10724. if (ISD::UNDEF == Op.getOpcode())
  10725. Opnds.append(NumElts, DAG.getUNDEF(MinVT));
  10726. if (ISD::BUILD_VECTOR == Op.getOpcode()) {
  10727. if (SVT.isFloatingPoint()) {
  10728. assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
  10729. Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
  10730. } else {
  10731. for (unsigned i = 0; i != NumElts; ++i)
  10732. Opnds.push_back(
  10733. DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
  10734. }
  10735. }
  10736. }
  10737. assert(VT.getVectorNumElements() == Opnds.size() &&
  10738. "Concat vector type mismatch");
  10739. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
  10740. }
  10741. // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
  10742. if (SDValue V = combineConcatVectorOfScalars(N, DAG))
  10743. return V;
  10744. // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
  10745. // nodes often generate nop CONCAT_VECTOR nodes.
  10746. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
  10747. // place the incoming vectors at the exact same location.
  10748. SDValue SingleSource = SDValue();
  10749. unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
  10750. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  10751. SDValue Op = N->getOperand(i);
  10752. if (Op.getOpcode() == ISD::UNDEF)
  10753. continue;
  10754. // Check if this is the identity extract:
  10755. if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  10756. return SDValue();
  10757. // Find the single incoming vector for the extract_subvector.
  10758. if (SingleSource.getNode()) {
  10759. if (Op.getOperand(0) != SingleSource)
  10760. return SDValue();
  10761. } else {
  10762. SingleSource = Op.getOperand(0);
  10763. // Check the source type is the same as the type of the result.
  10764. // If not, this concat may extend the vector, so we can not
  10765. // optimize it away.
  10766. if (SingleSource.getValueType() != N->getValueType(0))
  10767. return SDValue();
  10768. }
  10769. unsigned IdentityIndex = i * PartNumElem;
  10770. ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  10771. // The extract index must be constant.
  10772. if (!CS)
  10773. return SDValue();
  10774. // Check that we are reading from the identity index.
  10775. if (CS->getZExtValue() != IdentityIndex)
  10776. return SDValue();
  10777. }
  10778. if (SingleSource.getNode())
  10779. return SingleSource;
  10780. return SDValue();
  10781. }
  10782. SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
  10783. EVT NVT = N->getValueType(0);
  10784. SDValue V = N->getOperand(0);
  10785. if (V->getOpcode() == ISD::CONCAT_VECTORS) {
  10786. // Combine:
  10787. // (extract_subvec (concat V1, V2, ...), i)
  10788. // Into:
  10789. // Vi if possible
  10790. // Only operand 0 is checked as 'concat' assumes all inputs of the same
  10791. // type.
  10792. if (V->getOperand(0).getValueType() != NVT)
  10793. return SDValue();
  10794. unsigned Idx = N->getConstantOperandVal(1);
  10795. unsigned NumElems = NVT.getVectorNumElements();
  10796. assert((Idx % NumElems) == 0 &&
  10797. "IDX in concat is not a multiple of the result vector length.");
  10798. return V->getOperand(Idx / NumElems);
  10799. }
  10800. // Skip bitcasting
  10801. if (V->getOpcode() == ISD::BITCAST)
  10802. V = V.getOperand(0);
  10803. if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
  10804. SDLoc dl(N);
  10805. // Handle only simple case where vector being inserted and vector
  10806. // being extracted are of same type, and are half size of larger vectors.
  10807. EVT BigVT = V->getOperand(0).getValueType();
  10808. EVT SmallVT = V->getOperand(1).getValueType();
  10809. if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
  10810. return SDValue();
  10811. // Only handle cases where both indexes are constants with the same type.
  10812. ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
  10813. ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
  10814. if (InsIdx && ExtIdx &&
  10815. InsIdx->getValueType(0).getSizeInBits() <= 64 &&
  10816. ExtIdx->getValueType(0).getSizeInBits() <= 64) {
  10817. // Combine:
  10818. // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
  10819. // Into:
  10820. // indices are equal or bit offsets are equal => V1
  10821. // otherwise => (extract_subvec V1, ExtIdx)
  10822. if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
  10823. ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
  10824. return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
  10825. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
  10826. DAG.getNode(ISD::BITCAST, dl,
  10827. N->getOperand(0).getValueType(),
  10828. V->getOperand(0)), N->getOperand(1));
  10829. }
  10830. }
  10831. return SDValue();
  10832. }
  10833. static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
  10834. SDValue V, SelectionDAG &DAG) {
  10835. SDLoc DL(V);
  10836. EVT VT = V.getValueType();
  10837. switch (V.getOpcode()) {
  10838. default:
  10839. return V;
  10840. case ISD::CONCAT_VECTORS: {
  10841. EVT OpVT = V->getOperand(0).getValueType();
  10842. int OpSize = OpVT.getVectorNumElements();
  10843. SmallBitVector OpUsedElements(OpSize, false);
  10844. bool FoundSimplification = false;
  10845. SmallVector<SDValue, 4> NewOps;
  10846. NewOps.reserve(V->getNumOperands());
  10847. for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
  10848. SDValue Op = V->getOperand(i);
  10849. bool OpUsed = false;
  10850. for (int j = 0; j < OpSize; ++j)
  10851. if (UsedElements[i * OpSize + j]) {
  10852. OpUsedElements[j] = true;
  10853. OpUsed = true;
  10854. }
  10855. NewOps.push_back(
  10856. OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
  10857. : DAG.getUNDEF(OpVT));
  10858. FoundSimplification |= Op == NewOps.back();
  10859. OpUsedElements.reset();
  10860. }
  10861. if (FoundSimplification)
  10862. V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
  10863. return V;
  10864. }
  10865. case ISD::INSERT_SUBVECTOR: {
  10866. SDValue BaseV = V->getOperand(0);
  10867. SDValue SubV = V->getOperand(1);
  10868. auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
  10869. if (!IdxN)
  10870. return V;
  10871. int SubSize = SubV.getValueType().getVectorNumElements();
  10872. int Idx = IdxN->getZExtValue();
  10873. bool SubVectorUsed = false;
  10874. SmallBitVector SubUsedElements(SubSize, false);
  10875. for (int i = 0; i < SubSize; ++i)
  10876. if (UsedElements[i + Idx]) {
  10877. SubVectorUsed = true;
  10878. SubUsedElements[i] = true;
  10879. UsedElements[i + Idx] = false;
  10880. }
  10881. // Now recurse on both the base and sub vectors.
  10882. SDValue SimplifiedSubV =
  10883. SubVectorUsed
  10884. ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
  10885. : DAG.getUNDEF(SubV.getValueType());
  10886. SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
  10887. if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
  10888. V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
  10889. SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
  10890. return V;
  10891. }
  10892. }
  10893. }
  10894. static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
  10895. SDValue N1, SelectionDAG &DAG) {
  10896. EVT VT = SVN->getValueType(0);
  10897. int NumElts = VT.getVectorNumElements();
  10898. SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
  10899. for (int M : SVN->getMask())
  10900. if (M >= 0 && M < NumElts)
  10901. N0UsedElements[M] = true;
  10902. else if (M >= NumElts)
  10903. N1UsedElements[M - NumElts] = true;
  10904. SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
  10905. SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
  10906. if (S0 == N0 && S1 == N1)
  10907. return SDValue();
  10908. return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
  10909. }
  10910. // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
  10911. // or turn a shuffle of a single concat into simpler shuffle then concat.
  10912. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
  10913. EVT VT = N->getValueType(0);
  10914. unsigned NumElts = VT.getVectorNumElements();
  10915. SDValue N0 = N->getOperand(0);
  10916. SDValue N1 = N->getOperand(1);
  10917. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  10918. SmallVector<SDValue, 4> Ops;
  10919. EVT ConcatVT = N0.getOperand(0).getValueType();
  10920. unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
  10921. unsigned NumConcats = NumElts / NumElemsPerConcat;
  10922. // Special case: shuffle(concat(A,B)) can be more efficiently represented
  10923. // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
  10924. // half vector elements.
  10925. if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
  10926. std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
  10927. SVN->getMask().end(), [](int i) { return i == -1; })) {
  10928. N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
  10929. ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
  10930. N1 = DAG.getUNDEF(ConcatVT);
  10931. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
  10932. }
  10933. // Look at every vector that's inserted. We're looking for exact
  10934. // subvector-sized copies from a concatenated vector
  10935. for (unsigned I = 0; I != NumConcats; ++I) {
  10936. // Make sure we're dealing with a copy.
  10937. unsigned Begin = I * NumElemsPerConcat;
  10938. bool AllUndef = true, NoUndef = true;
  10939. for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
  10940. if (SVN->getMaskElt(J) >= 0)
  10941. AllUndef = false;
  10942. else
  10943. NoUndef = false;
  10944. }
  10945. if (NoUndef) {
  10946. if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
  10947. return SDValue();
  10948. for (unsigned J = 1; J != NumElemsPerConcat; ++J)
  10949. if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
  10950. return SDValue();
  10951. unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
  10952. if (FirstElt < N0.getNumOperands())
  10953. Ops.push_back(N0.getOperand(FirstElt));
  10954. else
  10955. Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
  10956. } else if (AllUndef) {
  10957. Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
  10958. } else { // Mixed with general masks and undefs, can't do optimization.
  10959. return SDValue();
  10960. }
  10961. }
  10962. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  10963. }
  10964. SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  10965. EVT VT = N->getValueType(0);
  10966. unsigned NumElts = VT.getVectorNumElements();
  10967. SDValue N0 = N->getOperand(0);
  10968. SDValue N1 = N->getOperand(1);
  10969. assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
  10970. // Canonicalize shuffle undef, undef -> undef
  10971. if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
  10972. return DAG.getUNDEF(VT);
  10973. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  10974. // Canonicalize shuffle v, v -> v, undef
  10975. if (N0 == N1) {
  10976. SmallVector<int, 8> NewMask;
  10977. for (unsigned i = 0; i != NumElts; ++i) {
  10978. int Idx = SVN->getMaskElt(i);
  10979. if (Idx >= (int)NumElts) Idx -= NumElts;
  10980. NewMask.push_back(Idx);
  10981. }
  10982. return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
  10983. &NewMask[0]);
  10984. }
  10985. // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
  10986. if (N0.getOpcode() == ISD::UNDEF) {
  10987. SmallVector<int, 8> NewMask;
  10988. for (unsigned i = 0; i != NumElts; ++i) {
  10989. int Idx = SVN->getMaskElt(i);
  10990. if (Idx >= 0) {
  10991. if (Idx >= (int)NumElts)
  10992. Idx -= NumElts;
  10993. else
  10994. Idx = -1; // remove reference to lhs
  10995. }
  10996. NewMask.push_back(Idx);
  10997. }
  10998. return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
  10999. &NewMask[0]);
  11000. }
  11001. // Remove references to rhs if it is undef
  11002. if (N1.getOpcode() == ISD::UNDEF) {
  11003. bool Changed = false;
  11004. SmallVector<int, 8> NewMask;
  11005. for (unsigned i = 0; i != NumElts; ++i) {
  11006. int Idx = SVN->getMaskElt(i);
  11007. if (Idx >= (int)NumElts) {
  11008. Idx = -1;
  11009. Changed = true;
  11010. }
  11011. NewMask.push_back(Idx);
  11012. }
  11013. if (Changed)
  11014. return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
  11015. }
  11016. // If it is a splat, check if the argument vector is another splat or a
  11017. // build_vector.
  11018. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
  11019. SDNode *V = N0.getNode();
  11020. // If this is a bit convert that changes the element type of the vector but
  11021. // not the number of vector elements, look through it. Be careful not to
  11022. // look though conversions that change things like v4f32 to v2f64.
  11023. if (V->getOpcode() == ISD::BITCAST) {
  11024. SDValue ConvInput = V->getOperand(0);
  11025. if (ConvInput.getValueType().isVector() &&
  11026. ConvInput.getValueType().getVectorNumElements() == NumElts)
  11027. V = ConvInput.getNode();
  11028. }
  11029. if (V->getOpcode() == ISD::BUILD_VECTOR) {
  11030. assert(V->getNumOperands() == NumElts &&
  11031. "BUILD_VECTOR has wrong number of operands");
  11032. SDValue Base;
  11033. bool AllSame = true;
  11034. for (unsigned i = 0; i != NumElts; ++i) {
  11035. if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
  11036. Base = V->getOperand(i);
  11037. break;
  11038. }
  11039. }
  11040. // Splat of <u, u, u, u>, return <u, u, u, u>
  11041. if (!Base.getNode())
  11042. return N0;
  11043. for (unsigned i = 0; i != NumElts; ++i) {
  11044. if (V->getOperand(i) != Base) {
  11045. AllSame = false;
  11046. break;
  11047. }
  11048. }
  11049. // Splat of <x, x, x, x>, return <x, x, x, x>
  11050. if (AllSame)
  11051. return N0;
  11052. // Canonicalize any other splat as a build_vector.
  11053. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
  11054. SmallVector<SDValue, 8> Ops(NumElts, Splatted);
  11055. SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
  11056. V->getValueType(0), Ops);
  11057. // We may have jumped through bitcasts, so the type of the
  11058. // BUILD_VECTOR may not match the type of the shuffle.
  11059. if (V->getValueType(0) != VT)
  11060. NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
  11061. return NewBV;
  11062. }
  11063. }
  11064. // There are various patterns used to build up a vector from smaller vectors,
  11065. // subvectors, or elements. Scan chains of these and replace unused insertions
  11066. // or components with undef.
  11067. if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
  11068. return S;
  11069. if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
  11070. Level < AfterLegalizeVectorOps &&
  11071. (N1.getOpcode() == ISD::UNDEF ||
  11072. (N1.getOpcode() == ISD::CONCAT_VECTORS &&
  11073. N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
  11074. SDValue V = partitionShuffleOfConcats(N, DAG);
  11075. if (V.getNode())
  11076. return V;
  11077. }
  11078. // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
  11079. // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
  11080. if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
  11081. SmallVector<SDValue, 8> Ops;
  11082. for (int M : SVN->getMask()) {
  11083. SDValue Op = DAG.getUNDEF(VT.getScalarType());
  11084. if (M >= 0) {
  11085. int Idx = M % NumElts;
  11086. SDValue &S = (M < (int)NumElts ? N0 : N1);
  11087. if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
  11088. Op = S.getOperand(Idx);
  11089. } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
  11090. if (Idx == 0)
  11091. Op = S.getOperand(0);
  11092. } else {
  11093. // Operand can't be combined - bail out.
  11094. break;
  11095. }
  11096. }
  11097. Ops.push_back(Op);
  11098. }
  11099. if (Ops.size() == VT.getVectorNumElements()) {
  11100. // BUILD_VECTOR requires all inputs to be of the same type, find the
  11101. // maximum type and extend them all.
  11102. EVT SVT = VT.getScalarType();
  11103. if (SVT.isInteger())
  11104. for (SDValue &Op : Ops)
  11105. SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
  11106. if (SVT != VT.getScalarType())
  11107. for (SDValue &Op : Ops)
  11108. Op = TLI.isZExtFree(Op.getValueType(), SVT)
  11109. ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
  11110. : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
  11111. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
  11112. }
  11113. }
  11114. // If this shuffle only has a single input that is a bitcasted shuffle,
  11115. // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
  11116. // back to their original types.
  11117. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
  11118. N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
  11119. TLI.isTypeLegal(VT)) {
  11120. // Peek through the bitcast only if there is one user.
  11121. SDValue BC0 = N0;
  11122. while (BC0.getOpcode() == ISD::BITCAST) {
  11123. if (!BC0.hasOneUse())
  11124. break;
  11125. BC0 = BC0.getOperand(0);
  11126. }
  11127. auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
  11128. if (Scale == 1)
  11129. return SmallVector<int, 8>(Mask.begin(), Mask.end());
  11130. SmallVector<int, 8> NewMask;
  11131. for (int M : Mask)
  11132. for (int s = 0; s != Scale; ++s)
  11133. NewMask.push_back(M < 0 ? -1 : Scale * M + s);
  11134. return NewMask;
  11135. };
  11136. if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
  11137. EVT SVT = VT.getScalarType();
  11138. EVT InnerVT = BC0->getValueType(0);
  11139. EVT InnerSVT = InnerVT.getScalarType();
  11140. // Determine which shuffle works with the smaller scalar type.
  11141. EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
  11142. EVT ScaleSVT = ScaleVT.getScalarType();
  11143. if (TLI.isTypeLegal(ScaleVT) &&
  11144. 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
  11145. 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
  11146. int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
  11147. int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
  11148. // Scale the shuffle masks to the smaller scalar type.
  11149. ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
  11150. SmallVector<int, 8> InnerMask =
  11151. ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
  11152. SmallVector<int, 8> OuterMask =
  11153. ScaleShuffleMask(SVN->getMask(), OuterScale);
  11154. // Merge the shuffle masks.
  11155. SmallVector<int, 8> NewMask;
  11156. for (int M : OuterMask)
  11157. NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
  11158. // Test for shuffle mask legality over both commutations.
  11159. SDValue SV0 = BC0->getOperand(0);
  11160. SDValue SV1 = BC0->getOperand(1);
  11161. bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
  11162. if (!LegalMask) {
  11163. std::swap(SV0, SV1);
  11164. ShuffleVectorSDNode::commuteMask(NewMask);
  11165. LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
  11166. }
  11167. if (LegalMask) {
  11168. SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
  11169. SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
  11170. return DAG.getNode(
  11171. ISD::BITCAST, SDLoc(N), VT,
  11172. DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
  11173. }
  11174. }
  11175. }
  11176. }
  11177. // Canonicalize shuffles according to rules:
  11178. // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
  11179. // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
  11180. // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
  11181. if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
  11182. N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
  11183. TLI.isTypeLegal(VT)) {
  11184. // The incoming shuffle must be of the same type as the result of the
  11185. // current shuffle.
  11186. assert(N1->getOperand(0).getValueType() == VT &&
  11187. "Shuffle types don't match");
  11188. SDValue SV0 = N1->getOperand(0);
  11189. SDValue SV1 = N1->getOperand(1);
  11190. bool HasSameOp0 = N0 == SV0;
  11191. bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
  11192. if (HasSameOp0 || IsSV1Undef || N0 == SV1)
  11193. // Commute the operands of this shuffle so that next rule
  11194. // will trigger.
  11195. return DAG.getCommutedVectorShuffle(*SVN);
  11196. }
  11197. // Try to fold according to rules:
  11198. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
  11199. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
  11200. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
  11201. // Don't try to fold shuffles with illegal type.
  11202. // Only fold if this shuffle is the only user of the other shuffle.
  11203. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
  11204. Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
  11205. ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  11206. // The incoming shuffle must be of the same type as the result of the
  11207. // current shuffle.
  11208. assert(OtherSV->getOperand(0).getValueType() == VT &&
  11209. "Shuffle types don't match");
  11210. SDValue SV0, SV1;
  11211. SmallVector<int, 4> Mask;
  11212. // Compute the combined shuffle mask for a shuffle with SV0 as the first
  11213. // operand, and SV1 as the second operand.
  11214. for (unsigned i = 0; i != NumElts; ++i) {
  11215. int Idx = SVN->getMaskElt(i);
  11216. if (Idx < 0) {
  11217. // Propagate Undef.
  11218. Mask.push_back(Idx);
  11219. continue;
  11220. }
  11221. SDValue CurrentVec;
  11222. if (Idx < (int)NumElts) {
  11223. // This shuffle index refers to the inner shuffle N0. Lookup the inner
  11224. // shuffle mask to identify which vector is actually referenced.
  11225. Idx = OtherSV->getMaskElt(Idx);
  11226. if (Idx < 0) {
  11227. // Propagate Undef.
  11228. Mask.push_back(Idx);
  11229. continue;
  11230. }
  11231. CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
  11232. : OtherSV->getOperand(1);
  11233. } else {
  11234. // This shuffle index references an element within N1.
  11235. CurrentVec = N1;
  11236. }
  11237. // Simple case where 'CurrentVec' is UNDEF.
  11238. if (CurrentVec.getOpcode() == ISD::UNDEF) {
  11239. Mask.push_back(-1);
  11240. continue;
  11241. }
  11242. // Canonicalize the shuffle index. We don't know yet if CurrentVec
  11243. // will be the first or second operand of the combined shuffle.
  11244. Idx = Idx % NumElts;
  11245. if (!SV0.getNode() || SV0 == CurrentVec) {
  11246. // Ok. CurrentVec is the left hand side.
  11247. // Update the mask accordingly.
  11248. SV0 = CurrentVec;
  11249. Mask.push_back(Idx);
  11250. continue;
  11251. }
  11252. // Bail out if we cannot convert the shuffle pair into a single shuffle.
  11253. if (SV1.getNode() && SV1 != CurrentVec)
  11254. return SDValue();
  11255. // Ok. CurrentVec is the right hand side.
  11256. // Update the mask accordingly.
  11257. SV1 = CurrentVec;
  11258. Mask.push_back(Idx + NumElts);
  11259. }
  11260. // Check if all indices in Mask are Undef. In case, propagate Undef.
  11261. bool isUndefMask = true;
  11262. for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
  11263. isUndefMask &= Mask[i] < 0;
  11264. if (isUndefMask)
  11265. return DAG.getUNDEF(VT);
  11266. if (!SV0.getNode())
  11267. SV0 = DAG.getUNDEF(VT);
  11268. if (!SV1.getNode())
  11269. SV1 = DAG.getUNDEF(VT);
  11270. // Avoid introducing shuffles with illegal mask.
  11271. if (!TLI.isShuffleMaskLegal(Mask, VT)) {
  11272. ShuffleVectorSDNode::commuteMask(Mask);
  11273. if (!TLI.isShuffleMaskLegal(Mask, VT))
  11274. return SDValue();
  11275. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
  11276. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
  11277. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
  11278. std::swap(SV0, SV1);
  11279. }
  11280. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
  11281. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
  11282. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
  11283. return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
  11284. }
  11285. return SDValue();
  11286. }
  11287. SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
  11288. SDValue InVal = N->getOperand(0);
  11289. EVT VT = N->getValueType(0);
  11290. // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
  11291. // with a VECTOR_SHUFFLE.
  11292. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
  11293. SDValue InVec = InVal->getOperand(0);
  11294. SDValue EltNo = InVal->getOperand(1);
  11295. // FIXME: We could support implicit truncation if the shuffle can be
  11296. // scaled to a smaller vector scalar type.
  11297. ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
  11298. if (C0 && VT == InVec.getValueType() &&
  11299. VT.getScalarType() == InVal.getValueType()) {
  11300. SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
  11301. int Elt = C0->getZExtValue();
  11302. NewMask[0] = Elt;
  11303. if (TLI.isShuffleMaskLegal(NewMask, VT))
  11304. return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
  11305. NewMask);
  11306. }
  11307. }
  11308. return SDValue();
  11309. }
  11310. SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
  11311. SDValue N0 = N->getOperand(0);
  11312. SDValue N2 = N->getOperand(2);
  11313. // If the input vector is a concatenation, and the insert replaces
  11314. // one of the halves, we can optimize into a single concat_vectors.
  11315. if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
  11316. N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
  11317. APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
  11318. EVT VT = N->getValueType(0);
  11319. // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
  11320. // (concat_vectors Z, Y)
  11321. if (InsIdx == 0)
  11322. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
  11323. N->getOperand(1), N0.getOperand(1));
  11324. // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
  11325. // (concat_vectors X, Z)
  11326. if (InsIdx == VT.getVectorNumElements()/2)
  11327. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
  11328. N0.getOperand(0), N->getOperand(1));
  11329. }
  11330. return SDValue();
  11331. }
  11332. SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
  11333. SDValue N0 = N->getOperand(0);
  11334. // fold (fp_to_fp16 (fp16_to_fp op)) -> op
  11335. if (N0->getOpcode() == ISD::FP16_TO_FP)
  11336. return N0->getOperand(0);
  11337. return SDValue();
  11338. }
  11339. /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
  11340. /// with the destination vector and a zero vector.
  11341. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
  11342. /// vector_shuffle V, Zero, <0, 4, 2, 4>
  11343. SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
  11344. EVT VT = N->getValueType(0);
  11345. SDValue LHS = N->getOperand(0);
  11346. SDValue RHS = N->getOperand(1);
  11347. SDLoc dl(N);
  11348. // Make sure we're not running after operation legalization where it
  11349. // may have custom lowered the vector shuffles.
  11350. if (LegalOperations)
  11351. return SDValue();
  11352. if (N->getOpcode() != ISD::AND)
  11353. return SDValue();
  11354. if (RHS.getOpcode() == ISD::BITCAST)
  11355. RHS = RHS.getOperand(0);
  11356. if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
  11357. SmallVector<int, 8> Indices;
  11358. unsigned NumElts = RHS.getNumOperands();
  11359. for (unsigned i = 0; i != NumElts; ++i) {
  11360. SDValue Elt = RHS.getOperand(i);
  11361. if (isAllOnesConstant(Elt))
  11362. Indices.push_back(i);
  11363. else if (isNullConstant(Elt))
  11364. Indices.push_back(NumElts+i);
  11365. else
  11366. return SDValue();
  11367. }
  11368. // Let's see if the target supports this vector_shuffle.
  11369. EVT RVT = RHS.getValueType();
  11370. if (!TLI.isVectorClearMaskLegal(Indices, RVT))
  11371. return SDValue();
  11372. // Return the new VECTOR_SHUFFLE node.
  11373. EVT EltVT = RVT.getVectorElementType();
  11374. SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
  11375. DAG.getConstant(0, dl, EltVT));
  11376. SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
  11377. LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
  11378. SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
  11379. return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
  11380. }
  11381. return SDValue();
  11382. }
  11383. /// Visit a binary vector operation, like ADD.
  11384. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
  11385. assert(N->getValueType(0).isVector() &&
  11386. "SimplifyVBinOp only works on vectors!");
  11387. SDValue LHS = N->getOperand(0);
  11388. SDValue RHS = N->getOperand(1);
  11389. if (SDValue Shuffle = XformToShuffleWithZero(N))
  11390. return Shuffle;
  11391. // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
  11392. // this operation.
  11393. if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
  11394. RHS.getOpcode() == ISD::BUILD_VECTOR) {
  11395. // Check if both vectors are constants. If not bail out.
  11396. if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
  11397. cast<BuildVectorSDNode>(RHS)->isConstant()))
  11398. return SDValue();
  11399. SmallVector<SDValue, 8> Ops;
  11400. for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
  11401. SDValue LHSOp = LHS.getOperand(i);
  11402. SDValue RHSOp = RHS.getOperand(i);
  11403. // Can't fold divide by zero.
  11404. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
  11405. N->getOpcode() == ISD::FDIV) {
  11406. if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
  11407. cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
  11408. break;
  11409. }
  11410. EVT VT = LHSOp.getValueType();
  11411. EVT RVT = RHSOp.getValueType();
  11412. if (RVT != VT) {
  11413. // Integer BUILD_VECTOR operands may have types larger than the element
  11414. // size (e.g., when the element type is not legal). Prior to type
  11415. // legalization, the types may not match between the two BUILD_VECTORS.
  11416. // Truncate one of the operands to make them match.
  11417. if (RVT.getSizeInBits() > VT.getSizeInBits()) {
  11418. RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
  11419. } else {
  11420. LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
  11421. VT = RVT;
  11422. }
  11423. }
  11424. SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
  11425. LHSOp, RHSOp);
  11426. if (FoldOp.getOpcode() != ISD::UNDEF &&
  11427. FoldOp.getOpcode() != ISD::Constant &&
  11428. FoldOp.getOpcode() != ISD::ConstantFP)
  11429. break;
  11430. Ops.push_back(FoldOp);
  11431. AddToWorklist(FoldOp.getNode());
  11432. }
  11433. if (Ops.size() == LHS.getNumOperands())
  11434. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
  11435. }
  11436. // Type legalization might introduce new shuffles in the DAG.
  11437. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
  11438. // -> (shuffle (VBinOp (A, B)), Undef, Mask).
  11439. if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
  11440. isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
  11441. LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
  11442. RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
  11443. ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
  11444. ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
  11445. if (SVN0->getMask().equals(SVN1->getMask())) {
  11446. EVT VT = N->getValueType(0);
  11447. SDValue UndefVector = LHS.getOperand(1);
  11448. SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
  11449. LHS.getOperand(0), RHS.getOperand(0));
  11450. AddUsersToWorklist(N);
  11451. return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
  11452. &SVN0->getMask()[0]);
  11453. }
  11454. }
  11455. return SDValue();
  11456. }
  11457. SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
  11458. SDValue N1, SDValue N2){
  11459. assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
  11460. SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
  11461. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  11462. // If we got a simplified select_cc node back from SimplifySelectCC, then
  11463. // break it down into a new SETCC node, and a new SELECT node, and then return
  11464. // the SELECT node, since we were called with a SELECT node.
  11465. if (SCC.getNode()) {
  11466. // Check to see if we got a select_cc back (to turn into setcc/select).
  11467. // Otherwise, just return whatever node we got back, like fabs.
  11468. if (SCC.getOpcode() == ISD::SELECT_CC) {
  11469. SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
  11470. N0.getValueType(),
  11471. SCC.getOperand(0), SCC.getOperand(1),
  11472. SCC.getOperand(4));
  11473. AddToWorklist(SETCC.getNode());
  11474. return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
  11475. SCC.getOperand(2), SCC.getOperand(3));
  11476. }
  11477. return SCC;
  11478. }
  11479. return SDValue();
  11480. }
  11481. /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
  11482. /// being selected between, see if we can simplify the select. Callers of this
  11483. /// should assume that TheSelect is deleted if this returns true. As such, they
  11484. /// should return the appropriate thing (e.g. the node) back to the top-level of
  11485. /// the DAG combiner loop to avoid it being looked at.
  11486. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
  11487. SDValue RHS) {
  11488. // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
  11489. // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
  11490. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
  11491. if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
  11492. // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
  11493. SDValue Sqrt = RHS;
  11494. ISD::CondCode CC;
  11495. SDValue CmpLHS;
  11496. const ConstantFPSDNode *NegZero = nullptr;
  11497. if (TheSelect->getOpcode() == ISD::SELECT_CC) {
  11498. CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
  11499. CmpLHS = TheSelect->getOperand(0);
  11500. NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
  11501. } else {
  11502. // SELECT or VSELECT
  11503. SDValue Cmp = TheSelect->getOperand(0);
  11504. if (Cmp.getOpcode() == ISD::SETCC) {
  11505. CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
  11506. CmpLHS = Cmp.getOperand(0);
  11507. NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
  11508. }
  11509. }
  11510. if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
  11511. Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
  11512. CC == ISD::SETULT || CC == ISD::SETLT)) {
  11513. // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
  11514. CombineTo(TheSelect, Sqrt);
  11515. return true;
  11516. }
  11517. }
  11518. }
  11519. // Cannot simplify select with vector condition
  11520. if (TheSelect->getOperand(0).getValueType().isVector()) return false;
  11521. // If this is a select from two identical things, try to pull the operation
  11522. // through the select.
  11523. if (LHS.getOpcode() != RHS.getOpcode() ||
  11524. !LHS.hasOneUse() || !RHS.hasOneUse())
  11525. return false;
  11526. // If this is a load and the token chain is identical, replace the select
  11527. // of two loads with a load through a select of the address to load from.
  11528. // This triggers in things like "select bool X, 10.0, 123.0" after the FP
  11529. // constants have been dropped into the constant pool.
  11530. if (LHS.getOpcode() == ISD::LOAD) {
  11531. LoadSDNode *LLD = cast<LoadSDNode>(LHS);
  11532. LoadSDNode *RLD = cast<LoadSDNode>(RHS);
  11533. // Token chains must be identical.
  11534. if (LHS.getOperand(0) != RHS.getOperand(0) ||
  11535. // Do not let this transformation reduce the number of volatile loads.
  11536. LLD->isVolatile() || RLD->isVolatile() ||
  11537. // FIXME: If either is a pre/post inc/dec load,
  11538. // we'd need to split out the address adjustment.
  11539. LLD->isIndexed() || RLD->isIndexed() ||
  11540. // If this is an EXTLOAD, the VT's must match.
  11541. LLD->getMemoryVT() != RLD->getMemoryVT() ||
  11542. // If this is an EXTLOAD, the kind of extension must match.
  11543. (LLD->getExtensionType() != RLD->getExtensionType() &&
  11544. // The only exception is if one of the extensions is anyext.
  11545. LLD->getExtensionType() != ISD::EXTLOAD &&
  11546. RLD->getExtensionType() != ISD::EXTLOAD) ||
  11547. // FIXME: this discards src value information. This is
  11548. // over-conservative. It would be beneficial to be able to remember
  11549. // both potential memory locations. Since we are discarding
  11550. // src value info, don't do the transformation if the memory
  11551. // locations are not in the default address space.
  11552. LLD->getPointerInfo().getAddrSpace() != 0 ||
  11553. RLD->getPointerInfo().getAddrSpace() != 0 ||
  11554. !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
  11555. LLD->getBasePtr().getValueType()))
  11556. return false;
  11557. // Check that the select condition doesn't reach either load. If so,
  11558. // folding this will induce a cycle into the DAG. If not, this is safe to
  11559. // xform, so create a select of the addresses.
  11560. SDValue Addr;
  11561. if (TheSelect->getOpcode() == ISD::SELECT) {
  11562. SDNode *CondNode = TheSelect->getOperand(0).getNode();
  11563. if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
  11564. (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
  11565. return false;
  11566. // The loads must not depend on one another.
  11567. if (LLD->isPredecessorOf(RLD) ||
  11568. RLD->isPredecessorOf(LLD))
  11569. return false;
  11570. Addr = DAG.getSelect(SDLoc(TheSelect),
  11571. LLD->getBasePtr().getValueType(),
  11572. TheSelect->getOperand(0), LLD->getBasePtr(),
  11573. RLD->getBasePtr());
  11574. } else { // Otherwise SELECT_CC
  11575. SDNode *CondLHS = TheSelect->getOperand(0).getNode();
  11576. SDNode *CondRHS = TheSelect->getOperand(1).getNode();
  11577. if ((LLD->hasAnyUseOfValue(1) &&
  11578. (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
  11579. (RLD->hasAnyUseOfValue(1) &&
  11580. (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
  11581. return false;
  11582. Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
  11583. LLD->getBasePtr().getValueType(),
  11584. TheSelect->getOperand(0),
  11585. TheSelect->getOperand(1),
  11586. LLD->getBasePtr(), RLD->getBasePtr(),
  11587. TheSelect->getOperand(4));
  11588. }
  11589. SDValue Load;
  11590. // It is safe to replace the two loads if they have different alignments,
  11591. // but the new load must be the minimum (most restrictive) alignment of the
  11592. // inputs.
  11593. bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
  11594. unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
  11595. if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
  11596. Load = DAG.getLoad(TheSelect->getValueType(0),
  11597. SDLoc(TheSelect),
  11598. // FIXME: Discards pointer and AA info.
  11599. LLD->getChain(), Addr, MachinePointerInfo(),
  11600. LLD->isVolatile(), LLD->isNonTemporal(),
  11601. isInvariant, Alignment);
  11602. } else {
  11603. Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
  11604. RLD->getExtensionType() : LLD->getExtensionType(),
  11605. SDLoc(TheSelect),
  11606. TheSelect->getValueType(0),
  11607. // FIXME: Discards pointer and AA info.
  11608. LLD->getChain(), Addr, MachinePointerInfo(),
  11609. LLD->getMemoryVT(), LLD->isVolatile(),
  11610. LLD->isNonTemporal(), isInvariant, Alignment);
  11611. }
  11612. // Users of the select now use the result of the load.
  11613. CombineTo(TheSelect, Load);
  11614. // Users of the old loads now use the new load's chain. We know the
  11615. // old-load value is dead now.
  11616. CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
  11617. CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
  11618. return true;
  11619. }
  11620. return false;
  11621. }
  11622. /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
  11623. /// where 'cond' is the comparison specified by CC.
  11624. SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  11625. SDValue N2, SDValue N3,
  11626. ISD::CondCode CC, bool NotExtCompare) {
  11627. // (x ? y : y) -> y.
  11628. if (N2 == N3) return N2;
  11629. EVT VT = N2.getValueType();
  11630. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  11631. ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
  11632. // Determine if the condition we're dealing with is constant
  11633. SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
  11634. N0, N1, CC, DL, false);
  11635. if (SCC.getNode()) AddToWorklist(SCC.getNode());
  11636. if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
  11637. // fold select_cc true, x, y -> x
  11638. // fold select_cc false, x, y -> y
  11639. return !SCCC->isNullValue() ? N2 : N3;
  11640. }
  11641. // Check to see if we can simplify the select into an fabs node
  11642. if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
  11643. // Allow either -0.0 or 0.0
  11644. if (CFP->isZero()) {
  11645. // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
  11646. if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
  11647. N0 == N2 && N3.getOpcode() == ISD::FNEG &&
  11648. N2 == N3.getOperand(0))
  11649. return DAG.getNode(ISD::FABS, DL, VT, N0);
  11650. // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
  11651. if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
  11652. N0 == N3 && N2.getOpcode() == ISD::FNEG &&
  11653. N2.getOperand(0) == N3)
  11654. return DAG.getNode(ISD::FABS, DL, VT, N3);
  11655. }
  11656. }
  11657. // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
  11658. // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
  11659. // in it. This is a win when the constant is not otherwise available because
  11660. // it replaces two constant pool loads with one. We only do this if the FP
  11661. // type is known to be legal, because if it isn't, then we are before legalize
  11662. // types an we want the other legalization to happen first (e.g. to avoid
  11663. // messing with soft float) and if the ConstantFP is not legal, because if
  11664. // it is legal, we may not need to store the FP constant in a constant pool.
  11665. if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
  11666. if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
  11667. if (TLI.isTypeLegal(N2.getValueType()) &&
  11668. (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
  11669. TargetLowering::Legal &&
  11670. !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
  11671. !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
  11672. // If both constants have multiple uses, then we won't need to do an
  11673. // extra load, they are likely around in registers for other users.
  11674. (TV->hasOneUse() || FV->hasOneUse())) {
  11675. Constant *Elts[] = {
  11676. const_cast<ConstantFP*>(FV->getConstantFPValue()),
  11677. const_cast<ConstantFP*>(TV->getConstantFPValue())
  11678. };
  11679. Type *FPTy = Elts[0]->getType();
  11680. const DataLayout &TD = DAG.getDataLayout();
  11681. // Create a ConstantArray of the two constants.
  11682. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
  11683. SDValue CPIdx =
  11684. DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
  11685. TD.getPrefTypeAlignment(FPTy));
  11686. unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  11687. // Get the offsets to the 0 and 1 element of the array so that we can
  11688. // select between them.
  11689. SDValue Zero = DAG.getIntPtrConstant(0, DL);
  11690. unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
  11691. SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
  11692. SDValue Cond = DAG.getSetCC(DL,
  11693. getSetCCResultType(N0.getValueType()),
  11694. N0, N1, CC);
  11695. AddToWorklist(Cond.getNode());
  11696. SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
  11697. Cond, One, Zero);
  11698. AddToWorklist(CstOffset.getNode());
  11699. CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
  11700. CstOffset);
  11701. AddToWorklist(CPIdx.getNode());
  11702. return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
  11703. MachinePointerInfo::getConstantPool(), false,
  11704. false, false, Alignment);
  11705. }
  11706. }
  11707. // Check to see if we can perform the "gzip trick", transforming
  11708. // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
  11709. if (isNullConstant(N3) && CC == ISD::SETLT &&
  11710. (isNullConstant(N1) || // (a < 0) ? b : 0
  11711. (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
  11712. EVT XType = N0.getValueType();
  11713. EVT AType = N2.getValueType();
  11714. if (XType.bitsGE(AType)) {
  11715. // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
  11716. // single-bit constant.
  11717. if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
  11718. unsigned ShCtV = N2C->getAPIntValue().logBase2();
  11719. ShCtV = XType.getSizeInBits() - ShCtV - 1;
  11720. SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
  11721. getShiftAmountTy(N0.getValueType()));
  11722. SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
  11723. XType, N0, ShCt);
  11724. AddToWorklist(Shift.getNode());
  11725. if (XType.bitsGT(AType)) {
  11726. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  11727. AddToWorklist(Shift.getNode());
  11728. }
  11729. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  11730. }
  11731. SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
  11732. XType, N0,
  11733. DAG.getConstant(XType.getSizeInBits() - 1,
  11734. SDLoc(N0),
  11735. getShiftAmountTy(N0.getValueType())));
  11736. AddToWorklist(Shift.getNode());
  11737. if (XType.bitsGT(AType)) {
  11738. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  11739. AddToWorklist(Shift.getNode());
  11740. }
  11741. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  11742. }
  11743. }
  11744. // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
  11745. // where y is has a single bit set.
  11746. // A plaintext description would be, we can turn the SELECT_CC into an AND
  11747. // when the condition can be materialized as an all-ones register. Any
  11748. // single bit-test can be materialized as an all-ones register with
  11749. // shift-left and shift-right-arith.
  11750. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
  11751. N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
  11752. SDValue AndLHS = N0->getOperand(0);
  11753. ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
  11754. if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
  11755. // Shift the tested bit over the sign bit.
  11756. APInt AndMask = ConstAndRHS->getAPIntValue();
  11757. SDValue ShlAmt =
  11758. DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
  11759. getShiftAmountTy(AndLHS.getValueType()));
  11760. SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
  11761. // Now arithmetic right shift it all the way over, so the result is either
  11762. // all-ones, or zero.
  11763. SDValue ShrAmt =
  11764. DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
  11765. getShiftAmountTy(Shl.getValueType()));
  11766. SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
  11767. return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
  11768. }
  11769. }
  11770. // fold select C, 16, 0 -> shl C, 4
  11771. if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
  11772. TLI.getBooleanContents(N0.getValueType()) ==
  11773. TargetLowering::ZeroOrOneBooleanContent) {
  11774. // If the caller doesn't want us to simplify this into a zext of a compare,
  11775. // don't do it.
  11776. if (NotExtCompare && N2C->isOne())
  11777. return SDValue();
  11778. // Get a SetCC of the condition
  11779. // NOTE: Don't create a SETCC if it's not legal on this target.
  11780. if (!LegalOperations ||
  11781. TLI.isOperationLegal(ISD::SETCC,
  11782. LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
  11783. SDValue Temp, SCC;
  11784. // cast from setcc result type to select result type
  11785. if (LegalTypes) {
  11786. SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
  11787. N0, N1, CC);
  11788. if (N2.getValueType().bitsLT(SCC.getValueType()))
  11789. Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
  11790. N2.getValueType());
  11791. else
  11792. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
  11793. N2.getValueType(), SCC);
  11794. } else {
  11795. SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
  11796. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
  11797. N2.getValueType(), SCC);
  11798. }
  11799. AddToWorklist(SCC.getNode());
  11800. AddToWorklist(Temp.getNode());
  11801. if (N2C->isOne())
  11802. return Temp;
  11803. // shl setcc result by log2 n2c
  11804. return DAG.getNode(
  11805. ISD::SHL, DL, N2.getValueType(), Temp,
  11806. DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
  11807. getShiftAmountTy(Temp.getValueType())));
  11808. }
  11809. }
  11810. // Check to see if this is the equivalent of setcc
  11811. // FIXME: Turn all of these into setcc if setcc if setcc is legal
  11812. // otherwise, go ahead with the folds.
  11813. if (0 && isNullConstant(N3) && isOneConstant(N2)) {
  11814. EVT XType = N0.getValueType();
  11815. if (!LegalOperations ||
  11816. TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
  11817. SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
  11818. if (Res.getValueType() != VT)
  11819. Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
  11820. return Res;
  11821. }
  11822. // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
  11823. if (isNullConstant(N1) && CC == ISD::SETEQ &&
  11824. (!LegalOperations ||
  11825. TLI.isOperationLegal(ISD::CTLZ, XType))) {
  11826. SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
  11827. return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
  11828. DAG.getConstant(Log2_32(XType.getSizeInBits()),
  11829. SDLoc(Ctlz),
  11830. getShiftAmountTy(Ctlz.getValueType())));
  11831. }
  11832. // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
  11833. if (isNullConstant(N1) && CC == ISD::SETGT) {
  11834. SDLoc DL(N0);
  11835. SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
  11836. XType, DAG.getConstant(0, DL, XType), N0);
  11837. SDValue NotN0 = DAG.getNOT(DL, N0, XType);
  11838. return DAG.getNode(ISD::SRL, DL, XType,
  11839. DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
  11840. DAG.getConstant(XType.getSizeInBits() - 1, DL,
  11841. getShiftAmountTy(XType)));
  11842. }
  11843. // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
  11844. if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
  11845. SDLoc DL(N0);
  11846. SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
  11847. DAG.getConstant(XType.getSizeInBits() - 1, DL,
  11848. getShiftAmountTy(N0.getValueType())));
  11849. return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
  11850. XType));
  11851. }
  11852. }
  11853. // Check to see if this is an integer abs.
  11854. // select_cc setg[te] X, 0, X, -X ->
  11855. // select_cc setgt X, -1, X, -X ->
  11856. // select_cc setl[te] X, 0, -X, X ->
  11857. // select_cc setlt X, 1, -X, X ->
  11858. // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  11859. if (N1C) {
  11860. ConstantSDNode *SubC = nullptr;
  11861. if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
  11862. (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
  11863. N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
  11864. SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
  11865. else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
  11866. (N1C->isOne() && CC == ISD::SETLT)) &&
  11867. N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
  11868. SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
  11869. EVT XType = N0.getValueType();
  11870. if (SubC && SubC->isNullValue() && XType.isInteger()) {
  11871. SDLoc DL(N0);
  11872. SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
  11873. N0,
  11874. DAG.getConstant(XType.getSizeInBits() - 1, DL,
  11875. getShiftAmountTy(N0.getValueType())));
  11876. SDValue Add = DAG.getNode(ISD::ADD, DL,
  11877. XType, N0, Shift);
  11878. AddToWorklist(Shift.getNode());
  11879. AddToWorklist(Add.getNode());
  11880. return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
  11881. }
  11882. }
  11883. return SDValue();
  11884. }
  11885. /// This is a stub for TargetLowering::SimplifySetCC.
  11886. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
  11887. SDValue N1, ISD::CondCode Cond,
  11888. SDLoc DL, bool foldBooleans) {
  11889. TargetLowering::DAGCombinerInfo
  11890. DagCombineInfo(DAG, Level, false, this);
  11891. return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
  11892. }
  11893. /// Given an ISD::SDIV node expressing a divide by constant, return
  11894. /// a DAG expression to select that will generate the same value by multiplying
  11895. /// by a magic number.
  11896. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  11897. SDValue DAGCombiner::BuildSDIV(SDNode *N) {
  11898. ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
  11899. if (!C)
  11900. return SDValue();
  11901. // Avoid division by zero.
  11902. if (C->isNullValue())
  11903. return SDValue();
  11904. std::vector<SDNode*> Built;
  11905. SDValue S =
  11906. TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
  11907. for (SDNode *N : Built)
  11908. AddToWorklist(N);
  11909. return S;
  11910. }
  11911. /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
  11912. /// DAG expression that will generate the same value by right shifting.
  11913. SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
  11914. ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
  11915. if (!C)
  11916. return SDValue();
  11917. // Avoid division by zero.
  11918. if (C->isNullValue())
  11919. return SDValue();
  11920. std::vector<SDNode *> Built;
  11921. SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
  11922. for (SDNode *N : Built)
  11923. AddToWorklist(N);
  11924. return S;
  11925. }
  11926. /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
  11927. /// expression that will generate the same value by multiplying by a magic
  11928. /// number.
  11929. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  11930. SDValue DAGCombiner::BuildUDIV(SDNode *N) {
  11931. ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
  11932. if (!C)
  11933. return SDValue();
  11934. // Avoid division by zero.
  11935. if (C->isNullValue())
  11936. return SDValue();
  11937. std::vector<SDNode*> Built;
  11938. SDValue S =
  11939. TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
  11940. for (SDNode *N : Built)
  11941. AddToWorklist(N);
  11942. return S;
  11943. }
  11944. SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
  11945. if (Level >= AfterLegalizeDAG)
  11946. return SDValue();
  11947. // Expose the DAG combiner to the target combiner implementations.
  11948. TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
  11949. unsigned Iterations = 0;
  11950. if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
  11951. if (Iterations) {
  11952. // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  11953. // For the reciprocal, we need to find the zero of the function:
  11954. // F(X) = A X - 1 [which has a zero at X = 1/A]
  11955. // =>
  11956. // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
  11957. // does not require additional intermediate precision]
  11958. EVT VT = Op.getValueType();
  11959. SDLoc DL(Op);
  11960. SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
  11961. AddToWorklist(Est.getNode());
  11962. // Newton iterations: Est = Est + Est (1 - Arg * Est)
  11963. for (unsigned i = 0; i < Iterations; ++i) {
  11964. SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
  11965. AddToWorklist(NewEst.getNode());
  11966. NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
  11967. AddToWorklist(NewEst.getNode());
  11968. NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
  11969. AddToWorklist(NewEst.getNode());
  11970. Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
  11971. AddToWorklist(Est.getNode());
  11972. }
  11973. }
  11974. return Est;
  11975. }
  11976. return SDValue();
  11977. }
  11978. /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  11979. /// For the reciprocal sqrt, we need to find the zero of the function:
  11980. /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
  11981. /// =>
  11982. /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
  11983. /// As a result, we precompute A/2 prior to the iteration loop.
  11984. SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
  11985. unsigned Iterations) {
  11986. EVT VT = Arg.getValueType();
  11987. SDLoc DL(Arg);
  11988. SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
  11989. // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
  11990. // this entire sequence requires only one FP constant.
  11991. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
  11992. AddToWorklist(HalfArg.getNode());
  11993. HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
  11994. AddToWorklist(HalfArg.getNode());
  11995. // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
  11996. for (unsigned i = 0; i < Iterations; ++i) {
  11997. SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
  11998. AddToWorklist(NewEst.getNode());
  11999. NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
  12000. AddToWorklist(NewEst.getNode());
  12001. NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
  12002. AddToWorklist(NewEst.getNode());
  12003. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
  12004. AddToWorklist(Est.getNode());
  12005. }
  12006. return Est;
  12007. }
  12008. /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  12009. /// For the reciprocal sqrt, we need to find the zero of the function:
  12010. /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
  12011. /// =>
  12012. /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
  12013. SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
  12014. unsigned Iterations) {
  12015. EVT VT = Arg.getValueType();
  12016. SDLoc DL(Arg);
  12017. SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
  12018. SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
  12019. // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
  12020. for (unsigned i = 0; i < Iterations; ++i) {
  12021. SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
  12022. AddToWorklist(HalfEst.getNode());
  12023. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
  12024. AddToWorklist(Est.getNode());
  12025. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
  12026. AddToWorklist(Est.getNode());
  12027. Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
  12028. AddToWorklist(Est.getNode());
  12029. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
  12030. AddToWorklist(Est.getNode());
  12031. }
  12032. return Est;
  12033. }
  12034. SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
  12035. if (Level >= AfterLegalizeDAG)
  12036. return SDValue();
  12037. // Expose the DAG combiner to the target combiner implementations.
  12038. TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
  12039. unsigned Iterations = 0;
  12040. bool UseOneConstNR = false;
  12041. if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
  12042. AddToWorklist(Est.getNode());
  12043. if (Iterations) {
  12044. Est = UseOneConstNR ?
  12045. BuildRsqrtNROneConst(Op, Est, Iterations) :
  12046. BuildRsqrtNRTwoConst(Op, Est, Iterations);
  12047. }
  12048. return Est;
  12049. }
  12050. return SDValue();
  12051. }
  12052. /// Return true if base is a frame index, which is known not to alias with
  12053. /// anything but itself. Provides base object and offset as results.
  12054. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
  12055. const GlobalValue *&GV, const void *&CV) {
  12056. // Assume it is a primitive operation.
  12057. Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
  12058. // If it's an adding a simple constant then integrate the offset.
  12059. if (Base.getOpcode() == ISD::ADD) {
  12060. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
  12061. Base = Base.getOperand(0);
  12062. Offset += C->getZExtValue();
  12063. }
  12064. }
  12065. // Return the underlying GlobalValue, and update the Offset. Return false
  12066. // for GlobalAddressSDNode since the same GlobalAddress may be represented
  12067. // by multiple nodes with different offsets.
  12068. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
  12069. GV = G->getGlobal();
  12070. Offset += G->getOffset();
  12071. return false;
  12072. }
  12073. // Return the underlying Constant value, and update the Offset. Return false
  12074. // for ConstantSDNodes since the same constant pool entry may be represented
  12075. // by multiple nodes with different offsets.
  12076. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
  12077. CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
  12078. : (const void *)C->getConstVal();
  12079. Offset += C->getOffset();
  12080. return false;
  12081. }
  12082. // If it's any of the following then it can't alias with anything but itself.
  12083. return isa<FrameIndexSDNode>(Base);
  12084. }
  12085. /// Return true if there is any possibility that the two addresses overlap.
  12086. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
  12087. // If they are the same then they must be aliases.
  12088. if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
  12089. // If they are both volatile then they cannot be reordered.
  12090. if (Op0->isVolatile() && Op1->isVolatile()) return true;
  12091. // If one operation reads from invariant memory, and the other may store, they
  12092. // cannot alias. These should really be checking the equivalent of mayWrite,
  12093. // but it only matters for memory nodes other than load /store.
  12094. if (Op0->isInvariant() && Op1->writeMem())
  12095. return false;
  12096. if (Op1->isInvariant() && Op0->writeMem())
  12097. return false;
  12098. // Gather base node and offset information.
  12099. SDValue Base1, Base2;
  12100. int64_t Offset1, Offset2;
  12101. const GlobalValue *GV1, *GV2;
  12102. const void *CV1, *CV2;
  12103. bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
  12104. Base1, Offset1, GV1, CV1);
  12105. bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
  12106. Base2, Offset2, GV2, CV2);
  12107. // If they have a same base address then check to see if they overlap.
  12108. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
  12109. return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
  12110. (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
  12111. // It is possible for different frame indices to alias each other, mostly
  12112. // when tail call optimization reuses return address slots for arguments.
  12113. // To catch this case, look up the actual index of frame indices to compute
  12114. // the real alias relationship.
  12115. if (isFrameIndex1 && isFrameIndex2) {
  12116. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
  12117. Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
  12118. Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
  12119. return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
  12120. (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
  12121. }
  12122. // Otherwise, if we know what the bases are, and they aren't identical, then
  12123. // we know they cannot alias.
  12124. if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
  12125. return false;
  12126. // If we know required SrcValue1 and SrcValue2 have relatively large alignment
  12127. // compared to the size and offset of the access, we may be able to prove they
  12128. // do not alias. This check is conservative for now to catch cases created by
  12129. // splitting vector types.
  12130. if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
  12131. (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
  12132. (Op0->getMemoryVT().getSizeInBits() >> 3 ==
  12133. Op1->getMemoryVT().getSizeInBits() >> 3) &&
  12134. (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
  12135. int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
  12136. int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
  12137. // There is no overlap between these relatively aligned accesses of similar
  12138. // size, return no alias.
  12139. if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
  12140. (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
  12141. return false;
  12142. }
  12143. bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
  12144. ? CombinerGlobalAA
  12145. : DAG.getSubtarget().useAA();
  12146. #ifndef NDEBUG
  12147. if (CombinerAAOnlyFunc.getNumOccurrences() &&
  12148. CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
  12149. UseAA = false;
  12150. #endif
  12151. if (UseAA &&
  12152. Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
  12153. // Use alias analysis information.
  12154. int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
  12155. Op1->getSrcValueOffset());
  12156. int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
  12157. Op0->getSrcValueOffset() - MinOffset;
  12158. int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
  12159. Op1->getSrcValueOffset() - MinOffset;
  12160. AliasResult AAResult =
  12161. AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
  12162. UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
  12163. MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
  12164. UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
  12165. if (AAResult == NoAlias)
  12166. return false;
  12167. }
  12168. // Otherwise we have to assume they alias.
  12169. return true;
  12170. }
  12171. /// Walk up chain skipping non-aliasing memory nodes,
  12172. /// looking for aliasing nodes and adding them to the Aliases vector.
  12173. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
  12174. SmallVectorImpl<SDValue> &Aliases) {
  12175. SmallVector<SDValue, 8> Chains; // List of chains to visit.
  12176. SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
  12177. // Get alias information for node.
  12178. bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
  12179. // Starting off.
  12180. Chains.push_back(OriginalChain);
  12181. unsigned Depth = 0;
  12182. // Look at each chain and determine if it is an alias. If so, add it to the
  12183. // aliases list. If not, then continue up the chain looking for the next
  12184. // candidate.
  12185. while (!Chains.empty()) {
  12186. SDValue Chain = Chains.pop_back_val();
  12187. // For TokenFactor nodes, look at each operand and only continue up the
  12188. // chain until we find two aliases. If we've seen two aliases, assume we'll
  12189. // find more and revert to original chain since the xform is unlikely to be
  12190. // profitable.
  12191. //
  12192. // FIXME: The depth check could be made to return the last non-aliasing
  12193. // chain we found before we hit a tokenfactor rather than the original
  12194. // chain.
  12195. if (Depth > 6 || Aliases.size() == 2) {
  12196. Aliases.clear();
  12197. Aliases.push_back(OriginalChain);
  12198. return;
  12199. }
  12200. // Don't bother if we've been before.
  12201. if (!Visited.insert(Chain.getNode()).second)
  12202. continue;
  12203. switch (Chain.getOpcode()) {
  12204. case ISD::EntryToken:
  12205. // Entry token is ideal chain operand, but handled in FindBetterChain.
  12206. break;
  12207. case ISD::LOAD:
  12208. case ISD::STORE: {
  12209. // Get alias information for Chain.
  12210. bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
  12211. !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
  12212. // If chain is alias then stop here.
  12213. if (!(IsLoad && IsOpLoad) &&
  12214. isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
  12215. Aliases.push_back(Chain);
  12216. } else {
  12217. // Look further up the chain.
  12218. Chains.push_back(Chain.getOperand(0));
  12219. ++Depth;
  12220. }
  12221. break;
  12222. }
  12223. case ISD::TokenFactor:
  12224. // We have to check each of the operands of the token factor for "small"
  12225. // token factors, so we queue them up. Adding the operands to the queue
  12226. // (stack) in reverse order maintains the original order and increases the
  12227. // likelihood that getNode will find a matching token factor (CSE.)
  12228. if (Chain.getNumOperands() > 16) {
  12229. Aliases.push_back(Chain);
  12230. break;
  12231. }
  12232. for (unsigned n = Chain.getNumOperands(); n;)
  12233. Chains.push_back(Chain.getOperand(--n));
  12234. ++Depth;
  12235. break;
  12236. default:
  12237. // For all other instructions we will just have to take what we can get.
  12238. Aliases.push_back(Chain);
  12239. break;
  12240. }
  12241. }
  12242. // We need to be careful here to also search for aliases through the
  12243. // value operand of a store, etc. Consider the following situation:
  12244. // Token1 = ...
  12245. // L1 = load Token1, %52
  12246. // S1 = store Token1, L1, %51
  12247. // L2 = load Token1, %52+8
  12248. // S2 = store Token1, L2, %51+8
  12249. // Token2 = Token(S1, S2)
  12250. // L3 = load Token2, %53
  12251. // S3 = store Token2, L3, %52
  12252. // L4 = load Token2, %53+8
  12253. // S4 = store Token2, L4, %52+8
  12254. // If we search for aliases of S3 (which loads address %52), and we look
  12255. // only through the chain, then we'll miss the trivial dependence on L1
  12256. // (which also loads from %52). We then might change all loads and
  12257. // stores to use Token1 as their chain operand, which could result in
  12258. // copying %53 into %52 before copying %52 into %51 (which should
  12259. // happen first).
  12260. //
  12261. // The problem is, however, that searching for such data dependencies
  12262. // can become expensive, and the cost is not directly related to the
  12263. // chain depth. Instead, we'll rule out such configurations here by
  12264. // insisting that we've visited all chain users (except for users
  12265. // of the original chain, which is not necessary). When doing this,
  12266. // we need to look through nodes we don't care about (otherwise, things
  12267. // like register copies will interfere with trivial cases).
  12268. SmallVector<const SDNode *, 16> Worklist;
  12269. for (const SDNode *N : Visited)
  12270. if (N != OriginalChain.getNode())
  12271. Worklist.push_back(N);
  12272. while (!Worklist.empty()) {
  12273. const SDNode *M = Worklist.pop_back_val();
  12274. // We have already visited M, and want to make sure we've visited any uses
  12275. // of M that we care about. For uses that we've not visisted, and don't
  12276. // care about, queue them to the worklist.
  12277. for (SDNode::use_iterator UI = M->use_begin(),
  12278. UIE = M->use_end(); UI != UIE; ++UI)
  12279. if (UI.getUse().getValueType() == MVT::Other &&
  12280. Visited.insert(*UI).second) {
  12281. if (isa<MemSDNode>(*UI)) {
  12282. // We've not visited this use, and we care about it (it could have an
  12283. // ordering dependency with the original node).
  12284. Aliases.clear();
  12285. Aliases.push_back(OriginalChain);
  12286. return;
  12287. }
  12288. // We've not visited this use, but we don't care about it. Mark it as
  12289. // visited and enqueue it to the worklist.
  12290. Worklist.push_back(*UI);
  12291. }
  12292. }
  12293. }
  12294. /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
  12295. /// (aliasing node.)
  12296. SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
  12297. SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
  12298. // Accumulate all the aliases to this node.
  12299. GatherAllAliases(N, OldChain, Aliases);
  12300. // If no operands then chain to entry token.
  12301. if (Aliases.size() == 0)
  12302. return DAG.getEntryNode();
  12303. // If a single operand then chain to it. We don't need to revisit it.
  12304. if (Aliases.size() == 1)
  12305. return Aliases[0];
  12306. // Construct a custom tailored token factor.
  12307. return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
  12308. }
  12309. /// This is the entry point for the file.
  12310. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
  12311. CodeGenOpt::Level OptLevel) {
  12312. /// This is the main entry point to this class.
  12313. DAGCombiner(*this, AA, OptLevel).Run(Level);
  12314. }