ScalarReplAggregatesHLSL.cpp 231 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214
  1. //===- ScalarReplAggregatesHLSL.cpp - Scalar Replacement of Aggregates ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //===----------------------------------------------------------------------===//
  10. //
  11. // Based on ScalarReplAggregates.cpp. The difference is HLSL version will keep
  12. // array so it can break up all structure.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/SetVector.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/Loads.h"
  20. #include "llvm/Analysis/ValueTracking.h"
  21. #include "llvm/Analysis/PostDominators.h"
  22. #include "llvm/IR/CallSite.h"
  23. #include "llvm/IR/Constants.h"
  24. #include "llvm/IR/DIBuilder.h"
  25. #include "llvm/IR/DataLayout.h"
  26. #include "llvm/IR/DebugInfo.h"
  27. #include "llvm/IR/DerivedTypes.h"
  28. #include "llvm/IR/Dominators.h"
  29. #include "llvm/IR/Function.h"
  30. #include "llvm/IR/GetElementPtrTypeIterator.h"
  31. #include "llvm/IR/GlobalVariable.h"
  32. #include "llvm/IR/IRBuilder.h"
  33. #include "llvm/IR/Instructions.h"
  34. #include "llvm/IR/IntrinsicInst.h"
  35. #include "llvm/IR/LLVMContext.h"
  36. #include "llvm/IR/InstIterator.h"
  37. #include "llvm/IR/Module.h"
  38. #include "llvm/IR/Operator.h"
  39. #include "llvm/Pass.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/ErrorHandling.h"
  42. #include "llvm/Support/MathExtras.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. #include "llvm/Transforms/Scalar.h"
  45. #include "llvm/Transforms/Utils/Local.h"
  46. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  47. #include "llvm/Transforms/Utils/SSAUpdater.h"
  48. #include "llvm/Transforms/Utils/Local.h"
  49. #include "dxc/HLSL/HLOperations.h"
  50. #include "dxc/DXIL/DxilConstants.h"
  51. #include "dxc/HLSL/HLModule.h"
  52. #include "dxc/DXIL/DxilUtil.h"
  53. #include "dxc/DXIL/DxilModule.h"
  54. #include "dxc/HlslIntrinsicOp.h"
  55. #include "dxc/DXIL/DxilTypeSystem.h"
  56. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  57. #include "dxc/HLSL/HLMatrixType.h"
  58. #include "dxc/DXIL/DxilOperations.h"
  59. #include "dxc/HLSL/HLLowerUDT.h"
  60. #include "dxc/HLSL/HLUtil.h"
  61. #include <deque>
  62. #include <unordered_map>
  63. #include <unordered_set>
  64. #include <queue>
  65. using namespace llvm;
  66. using namespace hlsl;
  67. #define DEBUG_TYPE "scalarreplhlsl"
  68. STATISTIC(NumReplaced, "Number of allocas broken up");
  69. STATISTIC(NumPromoted, "Number of allocas promoted");
  70. STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
  71. namespace {
  72. class SROA_Helper {
  73. public:
  74. // Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  75. // Then do SROA on V.
  76. static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
  77. Type *&BrokenUpTy, uint64_t &NumInstances,
  78. IRBuilder<> &Builder, bool bFlatVector,
  79. bool hasPrecise, DxilTypeSystem &typeSys,
  80. const DataLayout &DL,
  81. SmallVector<Value *, 32> &DeadInsts,
  82. DominatorTree *DT);
  83. static bool DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
  84. IRBuilder<> &Builder, bool bFlatVector,
  85. bool hasPrecise, DxilTypeSystem &typeSys,
  86. const DataLayout &DL,
  87. SmallVector<Value *, 32> &DeadInsts,
  88. DominatorTree *DT);
  89. static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
  90. Type *EltTy, unsigned Offset);
  91. // Lower memcpy related to V.
  92. static bool LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
  93. DxilTypeSystem &typeSys, const DataLayout &DL,
  94. DominatorTree *DT, bool bAllowReplace);
  95. static void MarkEmptyStructUsers(Value *V,
  96. SmallVector<Value *, 32> &DeadInsts);
  97. static bool IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys);
  98. private:
  99. SROA_Helper(Value *V, ArrayRef<Value *> Elts,
  100. SmallVector<Value *, 32> &DeadInsts, DxilTypeSystem &ts,
  101. const DataLayout &dl, DominatorTree *dt)
  102. : OldVal(V), NewElts(Elts), DeadInsts(DeadInsts), typeSys(ts), DL(dl), DT(dt) {}
  103. void RewriteForScalarRepl(Value *V, IRBuilder<> &Builder);
  104. private:
  105. // Must be a pointer type val.
  106. Value * OldVal;
  107. // Flattened elements for OldVal.
  108. ArrayRef<Value*> NewElts;
  109. SmallVector<Value *, 32> &DeadInsts;
  110. DxilTypeSystem &typeSys;
  111. const DataLayout &DL;
  112. DominatorTree *DT;
  113. void RewriteForConstExpr(ConstantExpr *user, IRBuilder<> &Builder);
  114. void RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder);
  115. void RewriteForAddrSpaceCast(Value *user, IRBuilder<> &Builder);
  116. void RewriteForLoad(LoadInst *loadInst);
  117. void RewriteForStore(StoreInst *storeInst);
  118. void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV);
  119. void RewriteCall(CallInst *CI);
  120. void RewriteBitCast(BitCastInst *BCI);
  121. void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut);
  122. };
  123. struct SROA_HLSL : public FunctionPass {
  124. SROA_HLSL(bool Promote, int T, bool hasDT, char &ID, int ST, int AT, int SLT)
  125. : FunctionPass(ID), HasDomTree(hasDT), RunPromotion(Promote) {
  126. if (AT == -1)
  127. ArrayElementThreshold = 8;
  128. else
  129. ArrayElementThreshold = AT;
  130. if (SLT == -1)
  131. // Do not limit the scalar integer load size if no threshold is given.
  132. ScalarLoadThreshold = -1;
  133. else
  134. ScalarLoadThreshold = SLT;
  135. }
  136. bool runOnFunction(Function &F) override;
  137. bool performScalarRepl(Function &F, DxilTypeSystem &typeSys);
  138. bool markPrecise(Function &F);
  139. private:
  140. bool HasDomTree;
  141. bool RunPromotion;
  142. /// DeadInsts - Keep track of instructions we have made dead, so that
  143. /// we can remove them after we are done working.
  144. SmallVector<Value *, 32> DeadInsts;
  145. /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
  146. /// information about the uses. All these fields are initialized to false
  147. /// and set to true when something is learned.
  148. struct AllocaInfo {
  149. /// The alloca to promote.
  150. AllocaInst *AI;
  151. /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
  152. /// looping and avoid redundant work.
  153. SmallPtrSet<PHINode *, 8> CheckedPHIs;
  154. /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
  155. bool isUnsafe : 1;
  156. /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
  157. bool isMemCpySrc : 1;
  158. /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
  159. bool isMemCpyDst : 1;
  160. /// hasSubelementAccess - This is true if a subelement of the alloca is
  161. /// ever accessed, or false if the alloca is only accessed with mem
  162. /// intrinsics or load/store that only access the entire alloca at once.
  163. bool hasSubelementAccess : 1;
  164. /// hasALoadOrStore - This is true if there are any loads or stores to it.
  165. /// The alloca may just be accessed with memcpy, for example, which would
  166. /// not set this.
  167. bool hasALoadOrStore : 1;
  168. /// hasArrayIndexing - This is true if there are any dynamic array
  169. /// indexing to it.
  170. bool hasArrayIndexing : 1;
  171. /// hasVectorIndexing - This is true if there are any dynamic vector
  172. /// indexing to it.
  173. bool hasVectorIndexing : 1;
  174. explicit AllocaInfo(AllocaInst *ai)
  175. : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
  176. hasSubelementAccess(false), hasALoadOrStore(false),
  177. hasArrayIndexing(false), hasVectorIndexing(false) {}
  178. };
  179. /// ArrayElementThreshold - The maximum number of elements an array can
  180. /// have to be considered for SROA.
  181. unsigned ArrayElementThreshold;
  182. /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
  183. /// converting to scalar
  184. unsigned ScalarLoadThreshold;
  185. void MarkUnsafe(AllocaInfo &I, Instruction *User) {
  186. I.isUnsafe = true;
  187. DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
  188. }
  189. bool isSafeAllocaToScalarRepl(AllocaInst *AI);
  190. void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
  191. void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
  192. AllocaInfo &Info);
  193. void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
  194. void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType,
  195. bool isStore, AllocaInfo &Info, Instruction *TheAccess,
  196. bool AllowWholeAccess);
  197. bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
  198. const DataLayout &DL);
  199. void DeleteDeadInstructions();
  200. bool ShouldAttemptScalarRepl(AllocaInst *AI);
  201. };
  202. // SROA_DT_HLSL - SROA that uses DominatorTree.
  203. struct SROA_DT_HLSL : public SROA_HLSL {
  204. static char ID;
  205. public:
  206. SROA_DT_HLSL(bool Promote = false, int T = -1, int ST = -1, int AT = -1, int SLT = -1)
  207. : SROA_HLSL(Promote, T, true, ID, ST, AT, SLT) {
  208. initializeSROA_DTPass(*PassRegistry::getPassRegistry());
  209. }
  210. // getAnalysisUsage - This pass does not require any passes, but we know it
  211. // will not alter the CFG, so say so.
  212. void getAnalysisUsage(AnalysisUsage &AU) const override {
  213. AU.addRequired<AssumptionCacheTracker>();
  214. AU.addRequired<DominatorTreeWrapperPass>();
  215. AU.setPreservesCFG();
  216. }
  217. };
  218. // SROA_SSAUp - SROA that uses SSAUpdater.
  219. struct SROA_SSAUp_HLSL : public SROA_HLSL {
  220. static char ID;
  221. public:
  222. SROA_SSAUp_HLSL(bool Promote = false, int T = -1, int ST = -1, int AT = -1, int SLT = -1)
  223. : SROA_HLSL(Promote, T, false, ID, ST, AT, SLT) {
  224. initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
  225. }
  226. // getAnalysisUsage - This pass does not require any passes, but we know it
  227. // will not alter the CFG, so say so.
  228. void getAnalysisUsage(AnalysisUsage &AU) const override {
  229. AU.addRequired<AssumptionCacheTracker>();
  230. AU.setPreservesCFG();
  231. }
  232. };
  233. // Simple struct to split memcpy into ld/st
  234. struct MemcpySplitter {
  235. llvm::LLVMContext &m_context;
  236. DxilTypeSystem &m_typeSys;
  237. public:
  238. MemcpySplitter(llvm::LLVMContext &context, DxilTypeSystem &typeSys)
  239. : m_context(context), m_typeSys(typeSys) {}
  240. void Split(llvm::Function &F);
  241. static void PatchMemCpyWithZeroIdxGEP(Module &M);
  242. static void PatchMemCpyWithZeroIdxGEP(MemCpyInst *MI, const DataLayout &DL);
  243. static void SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
  244. DxilFieldAnnotation *fieldAnnotation,
  245. DxilTypeSystem &typeSys,
  246. const bool bEltMemCpy = true);
  247. };
  248. }
  249. char SROA_DT_HLSL::ID = 0;
  250. char SROA_SSAUp_HLSL::ID = 0;
  251. INITIALIZE_PASS_BEGIN(SROA_DT_HLSL, "scalarreplhlsl",
  252. "Scalar Replacement of Aggregates HLSL (DT)", false,
  253. false)
  254. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  255. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  256. INITIALIZE_PASS_END(SROA_DT_HLSL, "scalarreplhlsl",
  257. "Scalar Replacement of Aggregates HLSL (DT)", false, false)
  258. INITIALIZE_PASS_BEGIN(SROA_SSAUp_HLSL, "scalarreplhlsl-ssa",
  259. "Scalar Replacement of Aggregates HLSL (SSAUp)", false,
  260. false)
  261. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  262. INITIALIZE_PASS_END(SROA_SSAUp_HLSL, "scalarreplhlsl-ssa",
  263. "Scalar Replacement of Aggregates HLSL (SSAUp)", false,
  264. false)
  265. // Public interface to the ScalarReplAggregates pass
  266. FunctionPass *llvm::createScalarReplAggregatesHLSLPass(bool UseDomTree, bool Promote) {
  267. if (UseDomTree)
  268. return new SROA_DT_HLSL(Promote);
  269. return new SROA_SSAUp_HLSL(Promote);
  270. }
  271. //===----------------------------------------------------------------------===//
  272. // SRoA Driver
  273. //===----------------------------------------------------------------------===//
  274. bool SROA_HLSL::runOnFunction(Function &F) {
  275. Module *M = F.getParent();
  276. HLModule &HLM = M->GetOrCreateHLModule();
  277. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  278. // Establish debug metadata layout name in the context in advance so the name
  279. // is serialized in both debug and non-debug compilations.
  280. (void)M->getContext().getMDKindID(DxilMDHelper::kDxilVariableDebugLayoutMDName);
  281. bool Changed = performScalarRepl(F, typeSys);
  282. // change rest memcpy into ld/st.
  283. MemcpySplitter splitter(F.getContext(), typeSys);
  284. splitter.Split(F);
  285. Changed |= markPrecise(F);
  286. return Changed;
  287. }
  288. namespace {
  289. class AllocaPromoter : public LoadAndStorePromoter {
  290. AllocaInst *AI;
  291. DIBuilder *DIB;
  292. SmallVector<DbgDeclareInst *, 4> DDIs;
  293. SmallVector<DbgValueInst *, 4> DVIs;
  294. public:
  295. AllocaPromoter(ArrayRef<Instruction *> Insts, SSAUpdater &S, DIBuilder *DB)
  296. : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
  297. void run(AllocaInst *AI, const SmallVectorImpl<Instruction *> &Insts) {
  298. // Remember which alloca we're promoting (for isInstInList).
  299. this->AI = AI;
  300. if (auto *L = LocalAsMetadata::getIfExists(AI)) {
  301. if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
  302. for (User *U : DINode->users())
  303. if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
  304. DDIs.push_back(DDI);
  305. else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
  306. DVIs.push_back(DVI);
  307. }
  308. }
  309. LoadAndStorePromoter::run(Insts);
  310. AI->eraseFromParent();
  311. for (SmallVectorImpl<DbgDeclareInst *>::iterator I = DDIs.begin(),
  312. E = DDIs.end();
  313. I != E; ++I) {
  314. DbgDeclareInst *DDI = *I;
  315. DDI->eraseFromParent();
  316. }
  317. for (SmallVectorImpl<DbgValueInst *>::iterator I = DVIs.begin(),
  318. E = DVIs.end();
  319. I != E; ++I) {
  320. DbgValueInst *DVI = *I;
  321. DVI->eraseFromParent();
  322. }
  323. }
  324. bool
  325. isInstInList(Instruction *I,
  326. const SmallVectorImpl<Instruction *> &Insts) const override {
  327. if (LoadInst *LI = dyn_cast<LoadInst>(I))
  328. return LI->getOperand(0) == AI;
  329. return cast<StoreInst>(I)->getPointerOperand() == AI;
  330. }
  331. void updateDebugInfo(Instruction *Inst) const override {
  332. for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
  333. E = DDIs.end();
  334. I != E; ++I) {
  335. DbgDeclareInst *DDI = *I;
  336. if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
  337. ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
  338. else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
  339. ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
  340. }
  341. for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
  342. E = DVIs.end();
  343. I != E; ++I) {
  344. DbgValueInst *DVI = *I;
  345. Value *Arg = nullptr;
  346. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  347. // If an argument is zero extended then use argument directly. The ZExt
  348. // may be zapped by an optimization pass in future.
  349. if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
  350. Arg = dyn_cast<Argument>(ZExt->getOperand(0));
  351. if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
  352. Arg = dyn_cast<Argument>(SExt->getOperand(0));
  353. if (!Arg)
  354. Arg = SI->getOperand(0);
  355. } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  356. Arg = LI->getOperand(0);
  357. } else {
  358. continue;
  359. }
  360. DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
  361. DVI->getExpression(), DVI->getDebugLoc(),
  362. Inst);
  363. }
  364. }
  365. };
  366. } // end anon namespace
  367. /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
  368. /// subsequently loaded can be rewritten to load both input pointers and then
  369. /// select between the result, allowing the load of the alloca to be promoted.
  370. /// From this:
  371. /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
  372. /// %V = load i32* %P2
  373. /// to:
  374. /// %V1 = load i32* %Alloca -> will be mem2reg'd
  375. /// %V2 = load i32* %Other
  376. /// %V = select i1 %cond, i32 %V1, i32 %V2
  377. ///
  378. /// We can do this to a select if its only uses are loads and if the operand to
  379. /// the select can be loaded unconditionally.
  380. static bool isSafeSelectToSpeculate(SelectInst *SI) {
  381. const DataLayout &DL = SI->getModule()->getDataLayout();
  382. bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL);
  383. bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL);
  384. for (User *U : SI->users()) {
  385. LoadInst *LI = dyn_cast<LoadInst>(U);
  386. if (!LI || !LI->isSimple())
  387. return false;
  388. // Both operands to the select need to be dereferencable, either absolutely
  389. // (e.g. allocas) or at this point because we can see other accesses to it.
  390. if (!TDerefable &&
  391. !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
  392. LI->getAlignment()))
  393. return false;
  394. if (!FDerefable &&
  395. !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
  396. LI->getAlignment()))
  397. return false;
  398. }
  399. return true;
  400. }
  401. /// isSafePHIToSpeculate - PHI instructions that use an alloca and are
  402. /// subsequently loaded can be rewritten to load both input pointers in the pred
  403. /// blocks and then PHI the results, allowing the load of the alloca to be
  404. /// promoted.
  405. /// From this:
  406. /// %P2 = phi [i32* %Alloca, i32* %Other]
  407. /// %V = load i32* %P2
  408. /// to:
  409. /// %V1 = load i32* %Alloca -> will be mem2reg'd
  410. /// ...
  411. /// %V2 = load i32* %Other
  412. /// ...
  413. /// %V = phi [i32 %V1, i32 %V2]
  414. ///
  415. /// We can do this to a select if its only uses are loads and if the operand to
  416. /// the select can be loaded unconditionally.
  417. static bool isSafePHIToSpeculate(PHINode *PN) {
  418. // For now, we can only do this promotion if the load is in the same block as
  419. // the PHI, and if there are no stores between the phi and load.
  420. // TODO: Allow recursive phi users.
  421. // TODO: Allow stores.
  422. BasicBlock *BB = PN->getParent();
  423. unsigned MaxAlign = 0;
  424. for (User *U : PN->users()) {
  425. LoadInst *LI = dyn_cast<LoadInst>(U);
  426. if (!LI || !LI->isSimple())
  427. return false;
  428. // For now we only allow loads in the same block as the PHI. This is a
  429. // common case that happens when instcombine merges two loads through a PHI.
  430. if (LI->getParent() != BB)
  431. return false;
  432. // Ensure that there are no instructions between the PHI and the load that
  433. // could store.
  434. for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
  435. if (BBI->mayWriteToMemory())
  436. return false;
  437. MaxAlign = std::max(MaxAlign, LI->getAlignment());
  438. }
  439. const DataLayout &DL = PN->getModule()->getDataLayout();
  440. // Okay, we know that we have one or more loads in the same block as the PHI.
  441. // We can transform this if it is safe to push the loads into the predecessor
  442. // blocks. The only thing to watch out for is that we can't put a possibly
  443. // trapping load in the predecessor if it is a critical edge.
  444. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  445. BasicBlock *Pred = PN->getIncomingBlock(i);
  446. Value *InVal = PN->getIncomingValue(i);
  447. // If the terminator of the predecessor has side-effects (an invoke),
  448. // there is no safe place to put a load in the predecessor.
  449. if (Pred->getTerminator()->mayHaveSideEffects())
  450. return false;
  451. // If the value is produced by the terminator of the predecessor
  452. // (an invoke), there is no valid place to put a load in the predecessor.
  453. if (Pred->getTerminator() == InVal)
  454. return false;
  455. // If the predecessor has a single successor, then the edge isn't critical.
  456. if (Pred->getTerminator()->getNumSuccessors() == 1)
  457. continue;
  458. // If this pointer is always safe to load, or if we can prove that there is
  459. // already a load in the block, then we can move the load to the pred block.
  460. if (isDereferenceablePointer(InVal, DL) ||
  461. isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
  462. continue;
  463. return false;
  464. }
  465. return true;
  466. }
  467. /// tryToMakeAllocaBePromotable - This returns true if the alloca only has
  468. /// direct (non-volatile) loads and stores to it. If the alloca is close but
  469. /// not quite there, this will transform the code to allow promotion. As such,
  470. /// it is a non-pure predicate.
  471. static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
  472. SetVector<Instruction *, SmallVector<Instruction *, 4>,
  473. SmallPtrSet<Instruction *, 4>>
  474. InstsToRewrite;
  475. for (User *U : AI->users()) {
  476. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  477. if (!LI->isSimple())
  478. return false;
  479. continue;
  480. }
  481. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  482. if (SI->getOperand(0) == AI || !SI->isSimple())
  483. return false; // Don't allow a store OF the AI, only INTO the AI.
  484. continue;
  485. }
  486. if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
  487. // If the condition being selected on is a constant, fold the select, yes
  488. // this does (rarely) happen early on.
  489. if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
  490. Value *Result = SI->getOperand(1 + CI->isZero());
  491. SI->replaceAllUsesWith(Result);
  492. SI->eraseFromParent();
  493. // This is very rare and we just scrambled the use list of AI, start
  494. // over completely.
  495. return tryToMakeAllocaBePromotable(AI, DL);
  496. }
  497. // If it is safe to turn "load (select c, AI, ptr)" into a select of two
  498. // loads, then we can transform this by rewriting the select.
  499. if (!isSafeSelectToSpeculate(SI))
  500. return false;
  501. InstsToRewrite.insert(SI);
  502. continue;
  503. }
  504. if (PHINode *PN = dyn_cast<PHINode>(U)) {
  505. if (PN->use_empty()) { // Dead PHIs can be stripped.
  506. InstsToRewrite.insert(PN);
  507. continue;
  508. }
  509. // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
  510. // in the pred blocks, then we can transform this by rewriting the PHI.
  511. if (!isSafePHIToSpeculate(PN))
  512. return false;
  513. InstsToRewrite.insert(PN);
  514. continue;
  515. }
  516. if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  517. if (onlyUsedByLifetimeMarkers(BCI)) {
  518. InstsToRewrite.insert(BCI);
  519. continue;
  520. }
  521. }
  522. return false;
  523. }
  524. // If there are no instructions to rewrite, then all uses are load/stores and
  525. // we're done!
  526. if (InstsToRewrite.empty())
  527. return true;
  528. // If we have instructions that need to be rewritten for this to be promotable
  529. // take care of it now.
  530. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
  531. if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
  532. // This could only be a bitcast used by nothing but lifetime intrinsics.
  533. for (BitCastInst::user_iterator I = BCI->user_begin(),
  534. E = BCI->user_end();
  535. I != E;)
  536. cast<Instruction>(*I++)->eraseFromParent();
  537. BCI->eraseFromParent();
  538. continue;
  539. }
  540. if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
  541. // Selects in InstsToRewrite only have load uses. Rewrite each as two
  542. // loads with a new select.
  543. while (!SI->use_empty()) {
  544. LoadInst *LI = cast<LoadInst>(SI->user_back());
  545. IRBuilder<> Builder(LI);
  546. LoadInst *TrueLoad =
  547. Builder.CreateLoad(SI->getTrueValue(), LI->getName() + ".t");
  548. LoadInst *FalseLoad =
  549. Builder.CreateLoad(SI->getFalseValue(), LI->getName() + ".f");
  550. // Transfer alignment and AA info if present.
  551. TrueLoad->setAlignment(LI->getAlignment());
  552. FalseLoad->setAlignment(LI->getAlignment());
  553. AAMDNodes Tags;
  554. LI->getAAMetadata(Tags);
  555. if (Tags) {
  556. TrueLoad->setAAMetadata(Tags);
  557. FalseLoad->setAAMetadata(Tags);
  558. }
  559. Value *V =
  560. Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
  561. V->takeName(LI);
  562. LI->replaceAllUsesWith(V);
  563. LI->eraseFromParent();
  564. }
  565. // Now that all the loads are gone, the select is gone too.
  566. SI->eraseFromParent();
  567. continue;
  568. }
  569. // Otherwise, we have a PHI node which allows us to push the loads into the
  570. // predecessors.
  571. PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
  572. if (PN->use_empty()) {
  573. PN->eraseFromParent();
  574. continue;
  575. }
  576. Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
  577. PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
  578. PN->getName() + ".ld", PN);
  579. // Get the AA tags and alignment to use from one of the loads. It doesn't
  580. // matter which one we get and if any differ, it doesn't matter.
  581. LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
  582. AAMDNodes AATags;
  583. SomeLoad->getAAMetadata(AATags);
  584. unsigned Align = SomeLoad->getAlignment();
  585. // Rewrite all loads of the PN to use the new PHI.
  586. while (!PN->use_empty()) {
  587. LoadInst *LI = cast<LoadInst>(PN->user_back());
  588. LI->replaceAllUsesWith(NewPN);
  589. LI->eraseFromParent();
  590. }
  591. // Inject loads into all of the pred blocks. Keep track of which blocks we
  592. // insert them into in case we have multiple edges from the same block.
  593. DenseMap<BasicBlock *, LoadInst *> InsertedLoads;
  594. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  595. BasicBlock *Pred = PN->getIncomingBlock(i);
  596. LoadInst *&Load = InsertedLoads[Pred];
  597. if (!Load) {
  598. Load = new LoadInst(PN->getIncomingValue(i),
  599. PN->getName() + "." + Pred->getName(),
  600. Pred->getTerminator());
  601. Load->setAlignment(Align);
  602. if (AATags)
  603. Load->setAAMetadata(AATags);
  604. }
  605. NewPN->addIncoming(Load, Pred);
  606. }
  607. PN->eraseFromParent();
  608. }
  609. ++NumAdjusted;
  610. return true;
  611. }
  612. /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
  613. /// SROA. It must be a struct or array type with a small number of elements.
  614. bool SROA_HLSL::ShouldAttemptScalarRepl(AllocaInst *AI) {
  615. Type *T = AI->getAllocatedType();
  616. // promote every struct.
  617. if (dyn_cast<StructType>(T))
  618. return true;
  619. // promote every array.
  620. if (dyn_cast<ArrayType>(T))
  621. return true;
  622. return false;
  623. }
  624. static unsigned getNestedLevelInStruct(const Type *ty) {
  625. unsigned lvl = 0;
  626. while (ty->isStructTy()) {
  627. if (ty->getStructNumElements() != 1)
  628. break;
  629. ty = ty->getStructElementType(0);
  630. lvl++;
  631. }
  632. return lvl;
  633. }
  634. // After SROA'ing a given value into a series of elements,
  635. // creates the debug info for the storage of the individual elements.
  636. static void addDebugInfoForElements(Value *ParentVal,
  637. Type *BrokenUpTy, uint64_t NumInstances,
  638. ArrayRef<Value*> Elems, const DataLayout &DatLayout,
  639. DIBuilder *DbgBuilder) {
  640. // Extract the data we need from the parent value,
  641. // depending on whether it is an alloca, argument or global variable.
  642. Type *ParentTy;
  643. unsigned ParentBitPieceOffset = 0;
  644. std::vector<DxilDIArrayDim> DIArrayDims;
  645. DIVariable *ParentDbgVariable;
  646. DIExpression *ParentDbgExpr;
  647. DILocation *ParentDbgLocation;
  648. Instruction *DbgDeclareInsertPt = nullptr;
  649. if (isa<GlobalVariable>(ParentVal)) {
  650. llvm_unreachable("Not implemented: sroa debug info propagation for global vars.");
  651. }
  652. else {
  653. if (AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal))
  654. ParentTy = ParentAlloca->getAllocatedType();
  655. else
  656. ParentTy = cast<Argument>(ParentVal)->getType();
  657. DbgDeclareInst *ParentDbgDeclare = llvm::FindAllocaDbgDeclare(ParentVal);
  658. if (ParentDbgDeclare == nullptr) return;
  659. // Get the bit piece offset
  660. if ((ParentDbgExpr = ParentDbgDeclare->getExpression())) {
  661. if (ParentDbgExpr->isBitPiece()) {
  662. ParentBitPieceOffset = ParentDbgExpr->getBitPieceOffset();
  663. }
  664. }
  665. ParentDbgVariable = ParentDbgDeclare->getVariable();
  666. ParentDbgLocation = ParentDbgDeclare->getDebugLoc();
  667. DbgDeclareInsertPt = ParentDbgDeclare;
  668. // Read the extra layout metadata, if any
  669. unsigned ParentBitPieceOffsetFromMD = 0;
  670. if (DxilMDHelper::GetVariableDebugLayout(ParentDbgDeclare, ParentBitPieceOffsetFromMD, DIArrayDims)) {
  671. // The offset is redundant for local variables and only necessary for global variables.
  672. DXASSERT(ParentBitPieceOffsetFromMD == ParentBitPieceOffset,
  673. "Bit piece offset mismatch between llvm.dbg.declare and DXIL metadata.");
  674. }
  675. }
  676. // If the type that was broken up is nested in arrays,
  677. // then each element will also be an array,
  678. // but the continuity between successive elements of the original aggregate
  679. // will have been broken, such that we must store the stride to rebuild it.
  680. // For example: [2 x {i32, float}] => [2 x i32], [2 x float], each with stride 64 bits
  681. if (NumInstances > 1 && Elems.size() > 1) {
  682. // Existing dimensions already account for part of the stride
  683. uint64_t NewDimNumElements = NumInstances;
  684. for (const DxilDIArrayDim& ArrayDim : DIArrayDims) {
  685. DXASSERT(NewDimNumElements % ArrayDim.NumElements == 0,
  686. "Debug array stride is inconsistent with the number of elements.");
  687. NewDimNumElements /= ArrayDim.NumElements;
  688. }
  689. // Add a stride dimension
  690. DxilDIArrayDim NewDIArrayDim = {};
  691. NewDIArrayDim.StrideInBits = (unsigned)DatLayout.getTypeAllocSizeInBits(BrokenUpTy);
  692. NewDIArrayDim.NumElements = (unsigned)NewDimNumElements;
  693. DIArrayDims.emplace_back(NewDIArrayDim);
  694. }
  695. else {
  696. DIArrayDims.clear();
  697. }
  698. // Create the debug info for each element
  699. for (unsigned ElemIdx = 0; ElemIdx < Elems.size(); ++ElemIdx) {
  700. // Figure out the offset of the element in the broken up type
  701. unsigned ElemBitPieceOffset = ParentBitPieceOffset;
  702. if (StructType *ParentStructTy = dyn_cast<StructType>(BrokenUpTy)) {
  703. DXASSERT_NOMSG(Elems.size() == ParentStructTy->getNumElements());
  704. ElemBitPieceOffset += (unsigned)DatLayout.getStructLayout(ParentStructTy)->getElementOffsetInBits(ElemIdx);
  705. }
  706. else if (VectorType *ParentVecTy = dyn_cast<VectorType>(BrokenUpTy)) {
  707. DXASSERT_NOMSG(Elems.size() == ParentVecTy->getNumElements());
  708. ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentVecTy->getElementType()) * ElemIdx;
  709. }
  710. else if (ArrayType *ParentArrayTy = dyn_cast<ArrayType>(BrokenUpTy)) {
  711. DXASSERT_NOMSG(Elems.size() == ParentArrayTy->getNumElements());
  712. ElemBitPieceOffset += (unsigned)DatLayout.getTypeStoreSizeInBits(ParentArrayTy->getElementType()) * ElemIdx;
  713. }
  714. // The bit_piece can only represent the leading contiguous bytes.
  715. // If strides are involved, we'll need additional metadata.
  716. Type *ElemTy = Elems[ElemIdx]->getType()->getPointerElementType();
  717. unsigned ElemBitPieceSize = (unsigned)DatLayout.getTypeAllocSizeInBits(ElemTy);
  718. for (const DxilDIArrayDim& ArrayDim : DIArrayDims)
  719. ElemBitPieceSize /= ArrayDim.NumElements;
  720. if (AllocaInst *ElemAlloca = dyn_cast<AllocaInst>(Elems[ElemIdx])) {
  721. // Local variables get an @llvm.dbg.declare plus optional metadata for layout stride information.
  722. DIExpression *ElemDbgExpr = nullptr;
  723. if (ElemBitPieceOffset == 0 && DatLayout.getTypeAllocSizeInBits(ParentTy) == ElemBitPieceSize) {
  724. ElemDbgExpr = DbgBuilder->createExpression();
  725. }
  726. else {
  727. ElemDbgExpr = DbgBuilder->createBitPieceExpression(ElemBitPieceOffset, ElemBitPieceSize);
  728. }
  729. DXASSERT_NOMSG(DbgBuilder != nullptr);
  730. DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DbgBuilder->insertDeclare(
  731. ElemAlloca, cast<DILocalVariable>(ParentDbgVariable), ElemDbgExpr, ParentDbgLocation, DbgDeclareInsertPt));
  732. if (!DIArrayDims.empty()) DxilMDHelper::SetVariableDebugLayout(EltDDI, ElemBitPieceOffset, DIArrayDims);
  733. }
  734. else {
  735. llvm_unreachable("Non-AllocaInst SROA'd elements.");
  736. }
  737. }
  738. }
  739. /// Returns first GEP index that indexes a struct member, or 0 otherwise.
  740. /// Ignores initial ptr index.
  741. static unsigned FindFirstStructMemberIdxInGEP(GEPOperator *GEP) {
  742. StructType *ST = dyn_cast<StructType>(
  743. GEP->getPointerOperandType()->getPointerElementType());
  744. int index = 1;
  745. for (auto it = gep_type_begin(GEP), E = gep_type_end(GEP); it != E;
  746. ++it, ++index) {
  747. if (ST) {
  748. DXASSERT(!HLMatrixType::isa(ST) && !dxilutil::IsHLSLObjectType(ST),
  749. "otherwise, indexing into hlsl object");
  750. return index;
  751. }
  752. ST = dyn_cast<StructType>(it->getPointerElementType());
  753. }
  754. return 0;
  755. }
  756. /// Return true when ptr should not be SROA'd or copied, but used directly
  757. /// by a function in its lowered form. Also collect uses for translation.
  758. /// What is meant by directly here:
  759. /// Possibly accessed through GEP array index or address space cast, but
  760. /// not under another struct member (always allow SROA of outer struct).
  761. typedef SmallMapVector<CallInst*, unsigned, 4> FunctionUseMap;
  762. static unsigned IsPtrUsedByLoweredFn(
  763. Value *V, FunctionUseMap &CollectedUses) {
  764. bool bFound = false;
  765. for (Use &U : V->uses()) {
  766. User *user = U.getUser();
  767. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  768. unsigned foundIdx = (unsigned)-1;
  769. Function *F = CI->getCalledFunction();
  770. Type *Ty = V->getType();
  771. if (F->isDeclaration() && !F->isIntrinsic() &&
  772. Ty->isPointerTy()) {
  773. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
  774. if (group == HLOpcodeGroup::HLIntrinsic) {
  775. unsigned opIdx = U.getOperandNo();
  776. switch ((IntrinsicOp)hlsl::GetHLOpcode(CI)) {
  777. // TODO: Lower these as well, along with function parameter types
  778. //case IntrinsicOp::IOP_TraceRay:
  779. // if (opIdx != HLOperandIndex::kTraceRayPayLoadOpIdx)
  780. // continue;
  781. // break;
  782. //case IntrinsicOp::IOP_ReportHit:
  783. // if (opIdx != HLOperandIndex::kReportIntersectionAttributeOpIdx)
  784. // continue;
  785. // break;
  786. //case IntrinsicOp::IOP_CallShader:
  787. // if (opIdx != HLOperandIndex::kCallShaderPayloadOpIdx)
  788. // continue;
  789. // break;
  790. case IntrinsicOp::IOP_DispatchMesh:
  791. if (opIdx != HLOperandIndex::kDispatchMeshOpPayload)
  792. continue;
  793. break;
  794. default:
  795. continue;
  796. }
  797. foundIdx = opIdx;
  798. // TODO: Lower these as well, along with function parameter types
  799. //} else if (group == HLOpcodeGroup::NotHL) {
  800. // foundIdx = U.getOperandNo();
  801. }
  802. }
  803. if (foundIdx != (unsigned)-1) {
  804. bFound = true;
  805. auto insRes = CollectedUses.insert(std::make_pair(CI, foundIdx));
  806. DXASSERT_LOCALVAR(insRes, insRes.second,
  807. "otherwise, multiple uses in single call");
  808. }
  809. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  810. // Not what we are looking for if GEP result is not [array of] struct.
  811. // If use is under struct member, we can still SROA the outer struct.
  812. if (!dxilutil::StripArrayTypes(GEP->getType()->getPointerElementType())
  813. ->isStructTy() ||
  814. FindFirstStructMemberIdxInGEP(cast<GEPOperator>(GEP)))
  815. continue;
  816. if (IsPtrUsedByLoweredFn(user, CollectedUses))
  817. bFound = true;
  818. } else if (AddrSpaceCastInst *AC = dyn_cast<AddrSpaceCastInst>(user)) {
  819. if (IsPtrUsedByLoweredFn(user, CollectedUses))
  820. bFound = true;
  821. } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(user)) {
  822. unsigned opcode = CE->getOpcode();
  823. if (opcode == Instruction::AddrSpaceCast || opcode == Instruction::GetElementPtr)
  824. if (IsPtrUsedByLoweredFn(user, CollectedUses))
  825. bFound = true;
  826. }
  827. }
  828. return bFound;
  829. }
  830. /// Rewrite call to natively use an argument with addrspace cast/bitcast
  831. static CallInst *RewriteIntrinsicCallForCastedArg(CallInst *CI, unsigned argIdx) {
  832. Function *F = CI->getCalledFunction();
  833. HLOpcodeGroup group = GetHLOpcodeGroupByName(F);
  834. DXASSERT_NOMSG(group == HLOpcodeGroup::HLIntrinsic);
  835. unsigned opcode = GetHLOpcode(CI);
  836. SmallVector<Type *, 8> newArgTypes(CI->getFunctionType()->param_begin(),
  837. CI->getFunctionType()->param_end());
  838. SmallVector<Value *, 8> newArgs(CI->arg_operands());
  839. Value *newArg = CI->getOperand(argIdx)->stripPointerCasts();
  840. newArgTypes[argIdx] = newArg->getType();
  841. newArgs[argIdx] = newArg;
  842. FunctionType *newFuncTy = FunctionType::get(CI->getType(), newArgTypes, false);
  843. Function *newF = GetOrCreateHLFunction(*F->getParent(), newFuncTy, group, opcode,
  844. F->getAttributes().getFnAttributes());
  845. IRBuilder<> Builder(CI);
  846. return Builder.CreateCall(newF, newArgs);
  847. }
  848. /// Translate pointer for cases where intrinsics use UDT pointers directly
  849. /// Return existing or new ptr if needs preserving,
  850. /// otherwise nullptr to proceed with existing checks and SROA.
  851. static Value *TranslatePtrIfUsedByLoweredFn(
  852. Value *Ptr, DxilTypeSystem &TypeSys) {
  853. if (!Ptr->getType()->isPointerTy())
  854. return nullptr;
  855. Type *Ty = Ptr->getType()->getPointerElementType();
  856. SmallVector<unsigned, 4> outerToInnerLengths;
  857. Ty = dxilutil::StripArrayTypes(Ty, &outerToInnerLengths);
  858. if (!Ty->isStructTy())
  859. return nullptr;
  860. if (HLMatrixType::isa(Ty) || dxilutil::IsHLSLObjectType(Ty))
  861. return nullptr;
  862. unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
  863. FunctionUseMap FunctionUses;
  864. if (!IsPtrUsedByLoweredFn(Ptr, FunctionUses))
  865. return nullptr;
  866. // Translate vectors to arrays in type, but don't SROA
  867. Type *NewTy = GetLoweredUDT(cast<StructType>(Ty), &TypeSys);
  868. // No work to do here, but prevent SROA.
  869. if (Ty == NewTy && AddrSpace != DXIL::kTGSMAddrSpace)
  870. return Ptr;
  871. // If type changed, replace value, otherwise casting may still
  872. // require a rewrite of the calls.
  873. Value *NewPtr = Ptr;
  874. if (Ty != NewTy) {
  875. NewTy = dxilutil::WrapInArrayTypes(NewTy, outerToInnerLengths);
  876. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
  877. Module &M = *GV->getParent();
  878. // Rewrite init expression for arrays instead of vectors
  879. Constant *Init = GV->hasInitializer() ?
  880. GV->getInitializer() : UndefValue::get(Ptr->getType());
  881. Constant *NewInit = TranslateInitForLoweredUDT(
  882. Init, NewTy, &TypeSys);
  883. // Replace with new GV, and rewrite vector load/store users
  884. GlobalVariable *NewGV = new GlobalVariable(
  885. M, NewTy, GV->isConstant(), GV->getLinkage(),
  886. NewInit, GV->getName(), /*InsertBefore*/ GV,
  887. GV->getThreadLocalMode(), AddrSpace);
  888. NewPtr = NewGV;
  889. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
  890. IRBuilder<> Builder(AI);
  891. AllocaInst * NewAI = Builder.CreateAlloca(NewTy, nullptr, AI->getName());
  892. NewPtr = NewAI;
  893. } else {
  894. DXASSERT(false, "Ptr must be global or alloca");
  895. }
  896. // This will rewrite vector load/store users
  897. // and insert bitcasts for CallInst users
  898. ReplaceUsesForLoweredUDT(Ptr, NewPtr);
  899. }
  900. // Rewrite the HLIntrinsic calls
  901. for (auto it : FunctionUses) {
  902. CallInst *CI = it.first;
  903. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  904. if (group == HLOpcodeGroup::NotHL)
  905. continue;
  906. CallInst *newCI = RewriteIntrinsicCallForCastedArg(CI, it.second);
  907. CI->replaceAllUsesWith(newCI);
  908. CI->eraseFromParent();
  909. }
  910. return NewPtr;
  911. }
  912. // performScalarRepl - This algorithm is a simple worklist driven algorithm,
  913. // which runs on all of the alloca instructions in the entry block, removing
  914. // them if they are only used by getelementptr instructions.
  915. //
  916. bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
  917. std::vector<AllocaInst *> AllocaList;
  918. const DataLayout &DL = F.getParent()->getDataLayout();
  919. // Make sure big alloca split first.
  920. // This will simplify memcpy check between part of big alloca and small
  921. // alloca. Big alloca will be split to smaller piece first, when process the
  922. // alloca, it will be alloca flattened from big alloca instead of a GEP of big
  923. // alloca.
  924. auto size_cmp = [&DL](const AllocaInst *a0, const AllocaInst *a1) -> bool {
  925. Type* a0ty = a0->getAllocatedType();
  926. Type* a1ty = a1->getAllocatedType();
  927. bool isUnitSzStruct0 = a0ty->isStructTy() && a0ty->getStructNumElements() == 1;
  928. bool isUnitSzStruct1 = a1ty->isStructTy() && a1ty->getStructNumElements() == 1;
  929. auto sz0 = DL.getTypeAllocSize(a0ty);
  930. auto sz1 = DL.getTypeAllocSize(a1ty);
  931. if (sz0 == sz1 && (isUnitSzStruct0 || isUnitSzStruct1))
  932. return getNestedLevelInStruct(a0ty) < getNestedLevelInStruct(a1ty);
  933. return sz0 < sz1;
  934. };
  935. std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
  936. std::function<bool(AllocaInst *, AllocaInst *)>>
  937. WorkList(size_cmp);
  938. // Scan the entry basic block, adding allocas to the worklist.
  939. BasicBlock &BB = F.getEntryBlock();
  940. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
  941. if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
  942. if (!A->user_empty()) {
  943. WorkList.push(A);
  944. // merge GEP use for the allocs
  945. HLModule::MergeGepUse(A);
  946. }
  947. }
  948. DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
  949. // Process the worklist
  950. bool Changed = false;
  951. while (!WorkList.empty()) {
  952. AllocaInst *AI = WorkList.top();
  953. WorkList.pop();
  954. // Handle dead allocas trivially. These can be formed by SROA'ing arrays
  955. // with unused elements.
  956. if (AI->use_empty()) {
  957. AI->eraseFromParent();
  958. Changed = true;
  959. continue;
  960. }
  961. const bool bAllowReplace = true;
  962. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  963. if (SROA_Helper::LowerMemcpy(AI, /*annotation*/ nullptr, typeSys, DL,
  964. DT, bAllowReplace)) {
  965. Changed = true;
  966. continue;
  967. }
  968. // If this alloca is impossible for us to promote, reject it early.
  969. if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
  970. continue;
  971. // Check to see if we can perform the core SROA transformation. We cannot
  972. // transform the allocation instruction if it is an array allocation
  973. // (allocations OF arrays are ok though), and an allocation of a scalar
  974. // value cannot be decomposed at all.
  975. uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
  976. // Do not promote [0 x %struct].
  977. if (AllocaSize == 0)
  978. continue;
  979. Type *Ty = AI->getAllocatedType();
  980. // Skip empty struct type.
  981. if (SROA_Helper::IsEmptyStructType(Ty, typeSys)) {
  982. SROA_Helper::MarkEmptyStructUsers(AI, DeadInsts);
  983. DeleteDeadInstructions();
  984. continue;
  985. }
  986. if (Value *NewV = TranslatePtrIfUsedByLoweredFn(AI, typeSys)) {
  987. if (NewV != AI) {
  988. DXASSERT(AI->getNumUses() == 0, "must have zero users.");
  989. AI->eraseFromParent();
  990. Changed = true;
  991. }
  992. continue;
  993. }
  994. // If the alloca looks like a good candidate for scalar replacement, and
  995. // if
  996. // all its users can be transformed, then split up the aggregate into its
  997. // separate elements.
  998. if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
  999. std::vector<Value *> Elts;
  1000. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
  1001. bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
  1002. Type *BrokenUpTy = nullptr;
  1003. uint64_t NumInstances = 1;
  1004. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1005. bool SROAed = SROA_Helper::DoScalarReplacement(
  1006. AI, Elts, BrokenUpTy, NumInstances, Builder,
  1007. /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, DT);
  1008. if (SROAed) {
  1009. Type *Ty = AI->getAllocatedType();
  1010. // Skip empty struct parameters.
  1011. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1012. if (!HLMatrixType::isa(Ty)) {
  1013. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  1014. if (SA && SA->IsEmptyStruct()) {
  1015. for (User *U : AI->users()) {
  1016. if (StoreInst *SI = dyn_cast<StoreInst>(U))
  1017. DeadInsts.emplace_back(SI);
  1018. }
  1019. DeleteDeadInstructions();
  1020. AI->replaceAllUsesWith(UndefValue::get(AI->getType()));
  1021. AI->eraseFromParent();
  1022. continue;
  1023. }
  1024. }
  1025. }
  1026. addDebugInfoForElements(AI, BrokenUpTy, NumInstances, Elts, DL, &DIB);
  1027. // Push Elts into workList.
  1028. for (unsigned EltIdx = 0; EltIdx < Elts.size(); ++EltIdx) {
  1029. AllocaInst *EltAlloca = cast<AllocaInst>(Elts[EltIdx]);
  1030. WorkList.push(EltAlloca);
  1031. }
  1032. // Now erase any instructions that were made dead while rewriting the
  1033. // alloca.
  1034. DeleteDeadInstructions();
  1035. ++NumReplaced;
  1036. DXASSERT(AI->getNumUses() == 0, "must have zero users.");
  1037. AI->eraseFromParent();
  1038. Changed = true;
  1039. continue;
  1040. }
  1041. }
  1042. }
  1043. return Changed;
  1044. }
  1045. // markPrecise - To save the precise attribute on alloca inst which might be removed by promote,
  1046. // mark precise attribute with function call on alloca inst stores.
  1047. bool SROA_HLSL::markPrecise(Function &F) {
  1048. bool Changed = false;
  1049. BasicBlock &BB = F.getEntryBlock();
  1050. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
  1051. if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
  1052. // TODO: Only do this on basic types.
  1053. if (HLModule::HasPreciseAttributeWithMetadata(A)) {
  1054. HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(A,
  1055. *(F.getParent()));
  1056. Changed = true;
  1057. }
  1058. }
  1059. return Changed;
  1060. }
  1061. /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
  1062. /// recursively including all their operands that become trivially dead.
  1063. void SROA_HLSL::DeleteDeadInstructions() {
  1064. while (!DeadInsts.empty()) {
  1065. Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
  1066. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
  1067. if (Instruction *U = dyn_cast<Instruction>(*OI)) {
  1068. // Zero out the operand and see if it becomes trivially dead.
  1069. // (But, don't add allocas to the dead instruction list -- they are
  1070. // already on the worklist and will be deleted separately.)
  1071. *OI = nullptr;
  1072. if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
  1073. DeadInsts.push_back(U);
  1074. }
  1075. I->eraseFromParent();
  1076. }
  1077. }
  1078. /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
  1079. /// performing scalar replacement of alloca AI. The results are flagged in
  1080. /// the Info parameter. Offset indicates the position within AI that is
  1081. /// referenced by this instruction.
  1082. void SROA_HLSL::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
  1083. AllocaInfo &Info) {
  1084. if (I->getType()->isPointerTy()) {
  1085. // Don't check object pointers.
  1086. if (dxilutil::IsHLSLObjectType(I->getType()->getPointerElementType()))
  1087. return;
  1088. }
  1089. const DataLayout &DL = I->getModule()->getDataLayout();
  1090. for (Use &U : I->uses()) {
  1091. Instruction *User = cast<Instruction>(U.getUser());
  1092. if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
  1093. isSafeForScalarRepl(BC, Offset, Info);
  1094. } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
  1095. uint64_t GEPOffset = Offset;
  1096. isSafeGEP(GEPI, GEPOffset, Info);
  1097. if (!Info.isUnsafe)
  1098. isSafeForScalarRepl(GEPI, GEPOffset, Info);
  1099. } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
  1100. ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
  1101. if (!Length || Length->isNegative())
  1102. return MarkUnsafe(Info, User);
  1103. isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
  1104. U.getOperandNo() == 0, Info, MI,
  1105. true /*AllowWholeAccess*/);
  1106. } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
  1107. if (!LI->isSimple())
  1108. return MarkUnsafe(Info, User);
  1109. Type *LIType = LI->getType();
  1110. isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
  1111. LI, true /*AllowWholeAccess*/);
  1112. Info.hasALoadOrStore = true;
  1113. } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  1114. // Store is ok if storing INTO the pointer, not storing the pointer
  1115. if (!SI->isSimple() || SI->getOperand(0) == I)
  1116. return MarkUnsafe(Info, User);
  1117. Type *SIType = SI->getOperand(0)->getType();
  1118. isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
  1119. SI, true /*AllowWholeAccess*/);
  1120. Info.hasALoadOrStore = true;
  1121. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
  1122. if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
  1123. II->getIntrinsicID() != Intrinsic::lifetime_end)
  1124. return MarkUnsafe(Info, User);
  1125. } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
  1126. isSafePHISelectUseForScalarRepl(User, Offset, Info);
  1127. } else if (CallInst *CI = dyn_cast<CallInst>(User)) {
  1128. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  1129. // Most HL functions are safe for scalar repl.
  1130. if (HLOpcodeGroup::NotHL == group)
  1131. return MarkUnsafe(Info, User);
  1132. else if (HLOpcodeGroup::HLIntrinsic == group) {
  1133. // TODO: should we check HL parameter type for UDT overload instead of basing on IOP?
  1134. IntrinsicOp opcode = static_cast<IntrinsicOp>(GetHLOpcode(CI));
  1135. if (IntrinsicOp::IOP_TraceRay == opcode ||
  1136. IntrinsicOp::IOP_ReportHit == opcode ||
  1137. IntrinsicOp::IOP_CallShader == opcode) {
  1138. return MarkUnsafe(Info, User);
  1139. }
  1140. }
  1141. } else {
  1142. return MarkUnsafe(Info, User);
  1143. }
  1144. if (Info.isUnsafe)
  1145. return;
  1146. }
  1147. }
  1148. /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
  1149. /// derived from the alloca, we can often still split the alloca into elements.
  1150. /// This is useful if we have a large alloca where one element is phi'd
  1151. /// together somewhere: we can SRoA and promote all the other elements even if
  1152. /// we end up not being able to promote this one.
  1153. ///
  1154. /// All we require is that the uses of the PHI do not index into other parts of
  1155. /// the alloca. The most important use case for this is single load and stores
  1156. /// that are PHI'd together, which can happen due to code sinking.
  1157. void SROA_HLSL::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
  1158. AllocaInfo &Info) {
  1159. // If we've already checked this PHI, don't do it again.
  1160. if (PHINode *PN = dyn_cast<PHINode>(I))
  1161. if (!Info.CheckedPHIs.insert(PN).second)
  1162. return;
  1163. const DataLayout &DL = I->getModule()->getDataLayout();
  1164. for (User *U : I->users()) {
  1165. Instruction *UI = cast<Instruction>(U);
  1166. if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) {
  1167. isSafePHISelectUseForScalarRepl(BC, Offset, Info);
  1168. } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
  1169. // Only allow "bitcast" GEPs for simplicity. We could generalize this,
  1170. // but would have to prove that we're staying inside of an element being
  1171. // promoted.
  1172. if (!GEPI->hasAllZeroIndices())
  1173. return MarkUnsafe(Info, UI);
  1174. isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
  1175. } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  1176. if (!LI->isSimple())
  1177. return MarkUnsafe(Info, UI);
  1178. Type *LIType = LI->getType();
  1179. isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
  1180. LI, false /*AllowWholeAccess*/);
  1181. Info.hasALoadOrStore = true;
  1182. } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
  1183. // Store is ok if storing INTO the pointer, not storing the pointer
  1184. if (!SI->isSimple() || SI->getOperand(0) == I)
  1185. return MarkUnsafe(Info, UI);
  1186. Type *SIType = SI->getOperand(0)->getType();
  1187. isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
  1188. SI, false /*AllowWholeAccess*/);
  1189. Info.hasALoadOrStore = true;
  1190. } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
  1191. isSafePHISelectUseForScalarRepl(UI, Offset, Info);
  1192. } else {
  1193. return MarkUnsafe(Info, UI);
  1194. }
  1195. if (Info.isUnsafe)
  1196. return;
  1197. }
  1198. }
  1199. /// isSafeGEP - Check if a GEP instruction can be handled for scalar
  1200. /// replacement. It is safe when all the indices are constant, in-bounds
  1201. /// references, and when the resulting offset corresponds to an element within
  1202. /// the alloca type. The results are flagged in the Info parameter. Upon
  1203. /// return, Offset is adjusted as specified by the GEP indices.
  1204. void SROA_HLSL::isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset,
  1205. AllocaInfo &Info) {
  1206. gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
  1207. if (GEPIt == E)
  1208. return;
  1209. bool NonConstant = false;
  1210. unsigned NonConstantIdxSize = 0;
  1211. // Compute the offset due to this GEP and check if the alloca has a
  1212. // component element at that offset.
  1213. SmallVector<Value *, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
  1214. auto indicesIt = Indices.begin();
  1215. // Walk through the GEP type indices, checking the types that this indexes
  1216. // into.
  1217. uint32_t arraySize = 0;
  1218. bool isArrayIndexing = false;
  1219. for (;GEPIt != E; ++GEPIt) {
  1220. Type *Ty = *GEPIt;
  1221. if (Ty->isStructTy() && !HLMatrixType::isa(Ty)) {
  1222. // Don't go inside struct when mark hasArrayIndexing and hasVectorIndexing.
  1223. // The following level won't affect scalar repl on the struct.
  1224. break;
  1225. }
  1226. if (GEPIt->isArrayTy()) {
  1227. arraySize = GEPIt->getArrayNumElements();
  1228. isArrayIndexing = true;
  1229. }
  1230. if (GEPIt->isVectorTy()) {
  1231. arraySize = GEPIt->getVectorNumElements();
  1232. isArrayIndexing = false;
  1233. }
  1234. // Allow dynamic indexing
  1235. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  1236. if (!IdxVal) {
  1237. // for dynamic index, use array size - 1 to check the offset
  1238. *indicesIt = Constant::getIntegerValue(
  1239. Type::getInt32Ty(GEPI->getContext()), APInt(32, arraySize - 1));
  1240. if (isArrayIndexing)
  1241. Info.hasArrayIndexing = true;
  1242. else
  1243. Info.hasVectorIndexing = true;
  1244. NonConstant = true;
  1245. }
  1246. indicesIt++;
  1247. }
  1248. // Continue iterate only for the NonConstant.
  1249. for (;GEPIt != E; ++GEPIt) {
  1250. Type *Ty = *GEPIt;
  1251. if (Ty->isArrayTy()) {
  1252. arraySize = GEPIt->getArrayNumElements();
  1253. }
  1254. if (Ty->isVectorTy()) {
  1255. arraySize = GEPIt->getVectorNumElements();
  1256. }
  1257. // Allow dynamic indexing
  1258. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  1259. if (!IdxVal) {
  1260. // for dynamic index, use array size - 1 to check the offset
  1261. *indicesIt = Constant::getIntegerValue(
  1262. Type::getInt32Ty(GEPI->getContext()), APInt(32, arraySize - 1));
  1263. NonConstant = true;
  1264. }
  1265. indicesIt++;
  1266. }
  1267. // If this GEP is non-constant then the last operand must have been a
  1268. // dynamic index into a vector. Pop this now as it has no impact on the
  1269. // constant part of the offset.
  1270. if (NonConstant)
  1271. Indices.pop_back();
  1272. const DataLayout &DL = GEPI->getModule()->getDataLayout();
  1273. Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
  1274. if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
  1275. DL))
  1276. MarkUnsafe(Info, GEPI);
  1277. }
  1278. /// isHomogeneousAggregate - Check if type T is a struct or array containing
  1279. /// elements of the same type (which is always true for arrays). If so,
  1280. /// return true with NumElts and EltTy set to the number of elements and the
  1281. /// element type, respectively.
  1282. static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, Type *&EltTy) {
  1283. if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
  1284. NumElts = AT->getNumElements();
  1285. EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
  1286. return true;
  1287. }
  1288. if (StructType *ST = dyn_cast<StructType>(T)) {
  1289. NumElts = ST->getNumContainedTypes();
  1290. EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
  1291. for (unsigned n = 1; n < NumElts; ++n) {
  1292. if (ST->getContainedType(n) != EltTy)
  1293. return false;
  1294. }
  1295. return true;
  1296. }
  1297. return false;
  1298. }
  1299. /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
  1300. /// "homogeneous" aggregates with the same element type and number of elements.
  1301. static bool isCompatibleAggregate(Type *T1, Type *T2) {
  1302. if (T1 == T2)
  1303. return true;
  1304. unsigned NumElts1, NumElts2;
  1305. Type *EltTy1, *EltTy2;
  1306. if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
  1307. isHomogeneousAggregate(T2, NumElts2, EltTy2) && NumElts1 == NumElts2 &&
  1308. EltTy1 == EltTy2)
  1309. return true;
  1310. return false;
  1311. }
  1312. /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
  1313. /// alloca or has an offset and size that corresponds to a component element
  1314. /// within it. The offset checked here may have been formed from a GEP with a
  1315. /// pointer bitcasted to a different type.
  1316. ///
  1317. /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
  1318. /// unit. If false, it only allows accesses known to be in a single element.
  1319. void SROA_HLSL::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
  1320. Type *MemOpType, bool isStore, AllocaInfo &Info,
  1321. Instruction *TheAccess, bool AllowWholeAccess) {
  1322. // What hlsl cares is Info.hasVectorIndexing.
  1323. // Do nothing here.
  1324. }
  1325. /// TypeHasComponent - Return true if T has a component type with the
  1326. /// specified offset and size. If Size is zero, do not check the size.
  1327. bool SROA_HLSL::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
  1328. const DataLayout &DL) {
  1329. Type *EltTy;
  1330. uint64_t EltSize;
  1331. if (StructType *ST = dyn_cast<StructType>(T)) {
  1332. const StructLayout *Layout = DL.getStructLayout(ST);
  1333. unsigned EltIdx = Layout->getElementContainingOffset(Offset);
  1334. EltTy = ST->getContainedType(EltIdx);
  1335. EltSize = DL.getTypeAllocSize(EltTy);
  1336. Offset -= Layout->getElementOffset(EltIdx);
  1337. } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
  1338. EltTy = AT->getElementType();
  1339. EltSize = DL.getTypeAllocSize(EltTy);
  1340. if (Offset >= AT->getNumElements() * EltSize)
  1341. return false;
  1342. Offset %= EltSize;
  1343. } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
  1344. EltTy = VT->getElementType();
  1345. EltSize = DL.getTypeAllocSize(EltTy);
  1346. if (Offset >= VT->getNumElements() * EltSize)
  1347. return false;
  1348. Offset %= EltSize;
  1349. } else {
  1350. return false;
  1351. }
  1352. if (Offset == 0 && (Size == 0 || EltSize == Size))
  1353. return true;
  1354. // Check if the component spans multiple elements.
  1355. if (Offset + Size > EltSize)
  1356. return false;
  1357. return TypeHasComponent(EltTy, Offset, Size, DL);
  1358. }
  1359. /// LoadVectorArray - Load vector array like [2 x <4 x float>] from
  1360. /// arrays like 4 [2 x float] or struct array like
  1361. /// [2 x { <4 x float>, < 4 x uint> }]
  1362. /// from arrays like [ 2 x <4 x float> ], [ 2 x <4 x uint> ].
  1363. static Value *LoadVectorOrStructArray(ArrayType *AT, ArrayRef<Value *> NewElts,
  1364. SmallVector<Value *, 8> &idxList,
  1365. IRBuilder<> &Builder) {
  1366. Type *EltTy = AT->getElementType();
  1367. Value *retVal = llvm::UndefValue::get(AT);
  1368. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  1369. uint32_t arraySize = AT->getNumElements();
  1370. for (uint32_t i = 0; i < arraySize; i++) {
  1371. Constant *idx = ConstantInt::get(i32Ty, i);
  1372. idxList.emplace_back(idx);
  1373. if (ArrayType *EltAT = dyn_cast<ArrayType>(EltTy)) {
  1374. Value *EltVal = LoadVectorOrStructArray(EltAT, NewElts, idxList, Builder);
  1375. retVal = Builder.CreateInsertValue(retVal, EltVal, i);
  1376. } else {
  1377. assert((EltTy->isVectorTy() ||
  1378. EltTy->isStructTy()) && "must be a vector or struct type");
  1379. bool isVectorTy = EltTy->isVectorTy();
  1380. Value *retVec = llvm::UndefValue::get(EltTy);
  1381. if (isVectorTy) {
  1382. for (uint32_t c = 0; c < EltTy->getVectorNumElements(); c++) {
  1383. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1384. Value *elt = Builder.CreateLoad(GEP);
  1385. retVec = Builder.CreateInsertElement(retVec, elt, c);
  1386. }
  1387. } else {
  1388. for (uint32_t c = 0; c < EltTy->getStructNumElements(); c++) {
  1389. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1390. Value *elt = Builder.CreateLoad(GEP);
  1391. retVec = Builder.CreateInsertValue(retVec, elt, c);
  1392. }
  1393. }
  1394. retVal = Builder.CreateInsertValue(retVal, retVec, i);
  1395. }
  1396. idxList.pop_back();
  1397. }
  1398. return retVal;
  1399. }
  1400. /// LoadVectorArray - Store vector array like [2 x <4 x float>] to
  1401. /// arrays like 4 [2 x float] or struct array like
  1402. /// [2 x { <4 x float>, < 4 x uint> }]
  1403. /// from arrays like [ 2 x <4 x float> ], [ 2 x <4 x uint> ].
  1404. static void StoreVectorOrStructArray(ArrayType *AT, Value *val,
  1405. ArrayRef<Value *> NewElts,
  1406. SmallVector<Value *, 8> &idxList,
  1407. IRBuilder<> &Builder) {
  1408. Type *EltTy = AT->getElementType();
  1409. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  1410. uint32_t arraySize = AT->getNumElements();
  1411. for (uint32_t i = 0; i < arraySize; i++) {
  1412. Value *elt = Builder.CreateExtractValue(val, i);
  1413. Constant *idx = ConstantInt::get(i32Ty, i);
  1414. idxList.emplace_back(idx);
  1415. if (ArrayType *EltAT = dyn_cast<ArrayType>(EltTy)) {
  1416. StoreVectorOrStructArray(EltAT, elt, NewElts, idxList, Builder);
  1417. } else {
  1418. assert((EltTy->isVectorTy() ||
  1419. EltTy->isStructTy()) && "must be a vector or struct type");
  1420. bool isVectorTy = EltTy->isVectorTy();
  1421. if (isVectorTy) {
  1422. for (uint32_t c = 0; c < EltTy->getVectorNumElements(); c++) {
  1423. Value *component = Builder.CreateExtractElement(elt, c);
  1424. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1425. Builder.CreateStore(component, GEP);
  1426. }
  1427. } else {
  1428. for (uint32_t c = 0; c < EltTy->getStructNumElements(); c++) {
  1429. Value *field = Builder.CreateExtractValue(elt, c);
  1430. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1431. Builder.CreateStore(field, GEP);
  1432. }
  1433. }
  1434. }
  1435. idxList.pop_back();
  1436. }
  1437. }
  1438. /// HasPadding - Return true if the specified type has any structure or
  1439. /// alignment padding in between the elements that would be split apart
  1440. /// by SROA; return false otherwise.
  1441. static bool HasPadding(Type *Ty, const DataLayout &DL) {
  1442. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
  1443. Ty = ATy->getElementType();
  1444. return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
  1445. }
  1446. // SROA currently handles only Arrays and Structs.
  1447. StructType *STy = cast<StructType>(Ty);
  1448. const StructLayout *SL = DL.getStructLayout(STy);
  1449. unsigned PrevFieldBitOffset = 0;
  1450. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
  1451. unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
  1452. // Check to see if there is any padding between this element and the
  1453. // previous one.
  1454. if (i) {
  1455. unsigned PrevFieldEnd =
  1456. PrevFieldBitOffset + DL.getTypeSizeInBits(STy->getElementType(i - 1));
  1457. if (PrevFieldEnd < FieldBitOffset)
  1458. return true;
  1459. }
  1460. PrevFieldBitOffset = FieldBitOffset;
  1461. }
  1462. // Check for tail padding.
  1463. if (unsigned EltCount = STy->getNumElements()) {
  1464. unsigned PrevFieldEnd =
  1465. PrevFieldBitOffset +
  1466. DL.getTypeSizeInBits(STy->getElementType(EltCount - 1));
  1467. if (PrevFieldEnd < SL->getSizeInBits())
  1468. return true;
  1469. }
  1470. return false;
  1471. }
  1472. /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
  1473. /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
  1474. /// or 1 if safe after canonicalization has been performed.
  1475. bool SROA_HLSL::isSafeAllocaToScalarRepl(AllocaInst *AI) {
  1476. // Loop over the use list of the alloca. We can only transform it if all of
  1477. // the users are safe to transform.
  1478. AllocaInfo Info(AI);
  1479. isSafeForScalarRepl(AI, 0, Info);
  1480. if (Info.isUnsafe) {
  1481. DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
  1482. return false;
  1483. }
  1484. // vector indexing need translate vector into array
  1485. if (Info.hasVectorIndexing)
  1486. return false;
  1487. const DataLayout &DL = AI->getModule()->getDataLayout();
  1488. // Okay, we know all the users are promotable. If the aggregate is a memcpy
  1489. // source and destination, we have to be careful. In particular, the memcpy
  1490. // could be moving around elements that live in structure padding of the LLVM
  1491. // types, but may actually be used. In these cases, we refuse to promote the
  1492. // struct.
  1493. if (Info.isMemCpySrc && Info.isMemCpyDst &&
  1494. HasPadding(AI->getAllocatedType(), DL))
  1495. return false;
  1496. return true;
  1497. }
  1498. // Copy data from srcPtr to destPtr.
  1499. static void SimplePtrCopy(Value *DestPtr, Value *SrcPtr,
  1500. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1501. IRBuilder<> &Builder) {
  1502. if (idxList.size() > 1) {
  1503. DestPtr = Builder.CreateInBoundsGEP(DestPtr, idxList);
  1504. SrcPtr = Builder.CreateInBoundsGEP(SrcPtr, idxList);
  1505. }
  1506. llvm::LoadInst *ld = Builder.CreateLoad(SrcPtr);
  1507. Builder.CreateStore(ld, DestPtr);
  1508. }
  1509. // Copy srcVal to destPtr.
  1510. static void SimpleValCopy(Value *DestPtr, Value *SrcVal,
  1511. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1512. IRBuilder<> &Builder) {
  1513. Value *DestGEP = Builder.CreateInBoundsGEP(DestPtr, idxList);
  1514. Value *Val = SrcVal;
  1515. // Skip beginning pointer type.
  1516. for (unsigned i = 1; i < idxList.size(); i++) {
  1517. ConstantInt *idx = cast<ConstantInt>(idxList[i]);
  1518. Type *Ty = Val->getType();
  1519. if (Ty->isAggregateType()) {
  1520. Val = Builder.CreateExtractValue(Val, idx->getLimitedValue());
  1521. }
  1522. }
  1523. Builder.CreateStore(Val, DestGEP);
  1524. }
  1525. static void SimpleCopy(Value *Dest, Value *Src,
  1526. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1527. IRBuilder<> &Builder) {
  1528. if (Src->getType()->isPointerTy())
  1529. SimplePtrCopy(Dest, Src, idxList, Builder);
  1530. else
  1531. SimpleValCopy(Dest, Src, idxList, Builder);
  1532. }
  1533. static Value *CreateMergedGEP(Value *Ptr, SmallVector<Value *, 16> &idxList,
  1534. IRBuilder<> &Builder) {
  1535. if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
  1536. SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
  1537. // skip idxLIst.begin() because it is included in GEPPtr idx.
  1538. IdxList.append(idxList.begin() + 1, idxList.end());
  1539. return Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
  1540. } else {
  1541. return Builder.CreateInBoundsGEP(Ptr, idxList);
  1542. }
  1543. }
  1544. static void EltMemCpy(Type *Ty, Value *Dest, Value *Src,
  1545. SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
  1546. const DataLayout &DL) {
  1547. Value *DestGEP = CreateMergedGEP(Dest, idxList, Builder);
  1548. Value *SrcGEP = CreateMergedGEP(Src, idxList, Builder);
  1549. unsigned size = DL.getTypeAllocSize(Ty);
  1550. Builder.CreateMemCpy(DestGEP, SrcGEP, size, /* Align */ 1);
  1551. }
  1552. static bool IsMemCpyTy(Type *Ty, DxilTypeSystem &typeSys) {
  1553. if (!Ty->isAggregateType())
  1554. return false;
  1555. if (HLMatrixType::isa(Ty))
  1556. return false;
  1557. if (dxilutil::IsHLSLObjectType(Ty))
  1558. return false;
  1559. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1560. DxilStructAnnotation *STA = typeSys.GetStructAnnotation(ST);
  1561. DXASSERT(STA, "require annotation here");
  1562. if (STA->IsEmptyStruct())
  1563. return false;
  1564. // Skip 1 element struct which the element is basic type.
  1565. // Because create memcpy will create gep on the struct, memcpy the basic
  1566. // type only.
  1567. if (ST->getNumElements() == 1)
  1568. return IsMemCpyTy(ST->getElementType(0), typeSys);
  1569. }
  1570. return true;
  1571. }
  1572. // Split copy into ld/st.
  1573. static void SplitCpy(Type *Ty, Value *Dest, Value *Src,
  1574. SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
  1575. const DataLayout &DL, DxilTypeSystem &typeSys,
  1576. const DxilFieldAnnotation *fieldAnnotation, const bool bEltMemCpy = true) {
  1577. if (PointerType *PT = dyn_cast<PointerType>(Ty)) {
  1578. Constant *idx = Constant::getIntegerValue(
  1579. IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
  1580. idxList.emplace_back(idx);
  1581. SplitCpy(PT->getElementType(), Dest, Src, idxList, Builder, DL, typeSys,
  1582. fieldAnnotation, bEltMemCpy);
  1583. idxList.pop_back();
  1584. } else if (HLMatrixType::isa(Ty)) {
  1585. // If no fieldAnnotation, use row major as default.
  1586. // Only load then store immediately should be fine.
  1587. bool bRowMajor = true;
  1588. if (fieldAnnotation) {
  1589. DXASSERT(fieldAnnotation->HasMatrixAnnotation(),
  1590. "must has matrix annotation");
  1591. bRowMajor = fieldAnnotation->GetMatrixAnnotation().Orientation ==
  1592. MatrixOrientation::RowMajor;
  1593. }
  1594. Module *M = Builder.GetInsertPoint()->getModule();
  1595. Value *DestMatPtr;
  1596. Value *SrcMatPtr;
  1597. if (idxList.size() == 1 && idxList[0] == ConstantInt::get(
  1598. IntegerType::get(Ty->getContext(), 32), APInt(32, 0))) {
  1599. // Avoid creating GEP(0)
  1600. DestMatPtr = Dest;
  1601. SrcMatPtr = Src;
  1602. }
  1603. else {
  1604. DestMatPtr = Builder.CreateInBoundsGEP(Dest, idxList);
  1605. SrcMatPtr = Builder.CreateInBoundsGEP(Src, idxList);
  1606. }
  1607. HLMatLoadStoreOpcode loadOp = bRowMajor
  1608. ? HLMatLoadStoreOpcode::RowMatLoad : HLMatLoadStoreOpcode::ColMatLoad;
  1609. HLMatLoadStoreOpcode storeOp = bRowMajor
  1610. ? HLMatLoadStoreOpcode::RowMatStore : HLMatLoadStoreOpcode::ColMatStore;
  1611. Value *Load = HLModule::EmitHLOperationCall(
  1612. Builder, HLOpcodeGroup::HLMatLoadStore, static_cast<unsigned>(loadOp),
  1613. Ty, { SrcMatPtr }, *M);
  1614. HLModule::EmitHLOperationCall(
  1615. Builder, HLOpcodeGroup::HLMatLoadStore, static_cast<unsigned>(storeOp),
  1616. Ty, { DestMatPtr, Load }, *M);
  1617. } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1618. if (dxilutil::IsHLSLObjectType(ST)) {
  1619. // Avoid split HLSL object.
  1620. SimpleCopy(Dest, Src, idxList, Builder);
  1621. return;
  1622. }
  1623. // Built-in structs have no type annotation
  1624. DxilStructAnnotation *STA = typeSys.GetStructAnnotation(ST);
  1625. if (STA && STA->IsEmptyStruct())
  1626. return;
  1627. for (uint32_t i = 0; i < ST->getNumElements(); i++) {
  1628. llvm::Type *ET = ST->getElementType(i);
  1629. Constant *idx = llvm::Constant::getIntegerValue(
  1630. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  1631. idxList.emplace_back(idx);
  1632. if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
  1633. EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
  1634. } else {
  1635. DxilFieldAnnotation *EltAnnotation = STA ? &STA->GetFieldAnnotation(i) : nullptr;
  1636. SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, EltAnnotation,
  1637. bEltMemCpy);
  1638. }
  1639. idxList.pop_back();
  1640. }
  1641. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  1642. Type *ET = AT->getElementType();
  1643. for (uint32_t i = 0; i < AT->getNumElements(); i++) {
  1644. Constant *idx = Constant::getIntegerValue(
  1645. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  1646. idxList.emplace_back(idx);
  1647. if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
  1648. EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
  1649. } else {
  1650. SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, fieldAnnotation,
  1651. bEltMemCpy);
  1652. }
  1653. idxList.pop_back();
  1654. }
  1655. } else {
  1656. SimpleCopy(Dest, Src, idxList, Builder);
  1657. }
  1658. }
  1659. // Given a pointer to a value, produces a list of pointers to
  1660. // all scalar elements of that value and their field annotations, at any nesting level.
  1661. static void SplitPtr(Value *Ptr, // The root value pointer
  1662. SmallVectorImpl<Value *> &IdxList, // GEP indices stack during recursion
  1663. Type *Ty, // Type at the current GEP indirection level
  1664. const DxilFieldAnnotation &Annotation, // Annotation at the current GEP indirection level
  1665. SmallVectorImpl<Value *> &EltPtrList, // Accumulates pointers to each element found
  1666. SmallVectorImpl<const DxilFieldAnnotation*> &EltAnnotationList, // Accumulates field annotations for each element found
  1667. DxilTypeSystem &TypeSys,
  1668. IRBuilder<> &Builder) {
  1669. if (PointerType *PT = dyn_cast<PointerType>(Ty)) {
  1670. Constant *idx = Constant::getIntegerValue(
  1671. IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
  1672. IdxList.emplace_back(idx);
  1673. SplitPtr(Ptr, IdxList, PT->getElementType(), Annotation,
  1674. EltPtrList, EltAnnotationList, TypeSys, Builder);
  1675. IdxList.pop_back();
  1676. return;
  1677. }
  1678. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1679. if (!HLMatrixType::isa(Ty) && !dxilutil::IsHLSLObjectType(ST)) {
  1680. const DxilStructAnnotation* SA = TypeSys.GetStructAnnotation(ST);
  1681. for (uint32_t i = 0; i < ST->getNumElements(); i++) {
  1682. llvm::Type *EltTy = ST->getElementType(i);
  1683. Constant *idx = llvm::Constant::getIntegerValue(
  1684. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  1685. IdxList.emplace_back(idx);
  1686. SplitPtr(Ptr, IdxList, EltTy, SA->GetFieldAnnotation(i),
  1687. EltPtrList, EltAnnotationList, TypeSys, Builder);
  1688. IdxList.pop_back();
  1689. }
  1690. return;
  1691. }
  1692. }
  1693. if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  1694. if (AT->getArrayNumElements() == 0) {
  1695. // Skip cases like [0 x %struct], nothing to copy
  1696. return;
  1697. }
  1698. Type *ElTy = AT->getElementType();
  1699. SmallVector<ArrayType *, 4> nestArrayTys;
  1700. nestArrayTys.emplace_back(AT);
  1701. // support multi level of array
  1702. while (ElTy->isArrayTy()) {
  1703. ArrayType *ElAT = cast<ArrayType>(ElTy);
  1704. nestArrayTys.emplace_back(ElAT);
  1705. ElTy = ElAT->getElementType();
  1706. }
  1707. if (ElTy->isStructTy() && !HLMatrixType::isa(ElTy)) {
  1708. DXASSERT(0, "Not support array of struct when split pointers.");
  1709. return;
  1710. }
  1711. }
  1712. // Return a pointer to the current element and its annotation
  1713. Value *GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
  1714. EltPtrList.emplace_back(GEP);
  1715. EltAnnotationList.emplace_back(&Annotation);
  1716. }
  1717. // Support case when bitcast (gep ptr, 0,0) is transformed into bitcast ptr.
  1718. static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsigned size, unsigned level) {
  1719. unsigned ptrSize = DL.getTypeAllocSize(Ty);
  1720. // Size match, return current level.
  1721. if (ptrSize == size) {
  1722. // Do not go deeper for matrix or object.
  1723. if (HLMatrixType::isa(Ty) || dxilutil::IsHLSLObjectType(Ty))
  1724. return level;
  1725. // For struct, go deeper if size not change.
  1726. // This will leave memcpy to deeper level when flatten.
  1727. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1728. if (ST->getNumElements() == 1) {
  1729. return MatchSizeByCheckElementType(ST->getElementType(0), DL, size, level+1);
  1730. }
  1731. }
  1732. // Don't do this for array.
  1733. // Array will be flattened as struct of array.
  1734. return level;
  1735. }
  1736. // Add ZeroIdx cannot make ptrSize bigger.
  1737. if (ptrSize < size)
  1738. return 0;
  1739. // ptrSize > size.
  1740. // Try to use element type to make size match.
  1741. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1742. return MatchSizeByCheckElementType(ST->getElementType(0), DL, size, level+1);
  1743. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  1744. return MatchSizeByCheckElementType(AT->getElementType(), DL, size, level+1);
  1745. } else {
  1746. return 0;
  1747. }
  1748. }
  1749. static void PatchZeroIdxGEP(Value *Ptr, Value *RawPtr, MemCpyInst *MI,
  1750. unsigned level, IRBuilder<> &Builder) {
  1751. Value *zeroIdx = Builder.getInt32(0);
  1752. Value *GEP = nullptr;
  1753. if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
  1754. SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
  1755. // level not + 1 because it is included in GEPPtr idx.
  1756. IdxList.append(level, zeroIdx);
  1757. GEP = Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
  1758. } else {
  1759. SmallVector<Value *, 2> IdxList(level + 1, zeroIdx);
  1760. GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
  1761. }
  1762. // Use BitCastInst::Create to prevent idxList from being optimized.
  1763. CastInst *Cast =
  1764. BitCastInst::Create(Instruction::BitCast, GEP, RawPtr->getType());
  1765. Builder.Insert(Cast);
  1766. MI->replaceUsesOfWith(RawPtr, Cast);
  1767. // Remove RawPtr if possible.
  1768. if (RawPtr->user_empty()) {
  1769. if (Instruction *I = dyn_cast<Instruction>(RawPtr)) {
  1770. I->eraseFromParent();
  1771. }
  1772. }
  1773. }
  1774. void MemcpySplitter::PatchMemCpyWithZeroIdxGEP(MemCpyInst *MI,
  1775. const DataLayout &DL) {
  1776. Value *Dest = MI->getRawDest();
  1777. Value *Src = MI->getRawSource();
  1778. // Only remove one level bitcast generated from inline.
  1779. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  1780. Dest = BC->getOperand(0);
  1781. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  1782. Src = BC->getOperand(0);
  1783. IRBuilder<> Builder(MI);
  1784. ConstantInt *zero = Builder.getInt32(0);
  1785. Type *DestTy = Dest->getType()->getPointerElementType();
  1786. Type *SrcTy = Src->getType()->getPointerElementType();
  1787. // Support case when bitcast (gep ptr, 0,0) is transformed into
  1788. // bitcast ptr.
  1789. // Also replace (gep ptr, 0) with ptr.
  1790. ConstantInt *Length = cast<ConstantInt>(MI->getLength());
  1791. unsigned size = Length->getLimitedValue();
  1792. if (unsigned level = MatchSizeByCheckElementType(DestTy, DL, size, 0)) {
  1793. PatchZeroIdxGEP(Dest, MI->getRawDest(), MI, level, Builder);
  1794. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(Dest)) {
  1795. if (GEP->getNumIndices() == 1) {
  1796. Value *idx = *GEP->idx_begin();
  1797. if (idx == zero) {
  1798. GEP->replaceAllUsesWith(GEP->getPointerOperand());
  1799. }
  1800. }
  1801. }
  1802. if (unsigned level = MatchSizeByCheckElementType(SrcTy, DL, size, 0)) {
  1803. PatchZeroIdxGEP(Src, MI->getRawSource(), MI, level, Builder);
  1804. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
  1805. if (GEP->getNumIndices() == 1) {
  1806. Value *idx = *GEP->idx_begin();
  1807. if (idx == zero) {
  1808. GEP->replaceAllUsesWith(GEP->getPointerOperand());
  1809. }
  1810. }
  1811. }
  1812. }
  1813. void MemcpySplitter::PatchMemCpyWithZeroIdxGEP(Module &M) {
  1814. const DataLayout &DL = M.getDataLayout();
  1815. for (Function &F : M.functions()) {
  1816. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
  1817. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
  1818. // Avoid invalidating the iterator.
  1819. Instruction *I = BI++;
  1820. if (MemCpyInst *MI = dyn_cast<MemCpyInst>(I)) {
  1821. PatchMemCpyWithZeroIdxGEP(MI, DL);
  1822. }
  1823. }
  1824. }
  1825. }
  1826. }
  1827. static void DeleteMemcpy(MemCpyInst *MI) {
  1828. Value *Op0 = MI->getOperand(0);
  1829. Value *Op1 = MI->getOperand(1);
  1830. // delete memcpy
  1831. MI->eraseFromParent();
  1832. if (Instruction *op0 = dyn_cast<Instruction>(Op0)) {
  1833. if (op0->user_empty())
  1834. op0->eraseFromParent();
  1835. }
  1836. if (Instruction *op1 = dyn_cast<Instruction>(Op1)) {
  1837. if (op1->user_empty())
  1838. op1->eraseFromParent();
  1839. }
  1840. }
  1841. // If user is function call, return param annotation to get matrix major.
  1842. static DxilFieldAnnotation *FindAnnotationFromMatUser(Value *Mat,
  1843. DxilTypeSystem &typeSys) {
  1844. for (User *U : Mat->users()) {
  1845. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  1846. Function *F = CI->getCalledFunction();
  1847. if (DxilFunctionAnnotation *Anno = typeSys.GetFunctionAnnotation(F)) {
  1848. for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
  1849. if (CI->getArgOperand(i) == Mat) {
  1850. return &Anno->GetParameterAnnotation(i);
  1851. }
  1852. }
  1853. }
  1854. }
  1855. }
  1856. return nullptr;
  1857. }
  1858. void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
  1859. DxilFieldAnnotation *fieldAnnotation,
  1860. DxilTypeSystem &typeSys, const bool bEltMemCpy) {
  1861. Value *Dest = MI->getRawDest();
  1862. Value *Src = MI->getRawSource();
  1863. // Only remove one level bitcast generated from inline.
  1864. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  1865. Dest = BC->getOperand(0);
  1866. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  1867. Src = BC->getOperand(0);
  1868. if (Dest == Src) {
  1869. // delete self copy.
  1870. DeleteMemcpy(MI);
  1871. return;
  1872. }
  1873. IRBuilder<> Builder(MI);
  1874. Type *DestTy = Dest->getType()->getPointerElementType();
  1875. Type *SrcTy = Src->getType()->getPointerElementType();
  1876. // Allow copy between different address space.
  1877. if (DestTy != SrcTy) {
  1878. return;
  1879. }
  1880. // Try to find fieldAnnotation from user of Dest/Src.
  1881. if (!fieldAnnotation) {
  1882. Type *EltTy = dxilutil::GetArrayEltTy(DestTy);
  1883. if (HLMatrixType::isa(EltTy)) {
  1884. fieldAnnotation = FindAnnotationFromMatUser(Dest, typeSys);
  1885. }
  1886. }
  1887. llvm::SmallVector<llvm::Value *, 16> idxList;
  1888. // split
  1889. // Matrix is treated as scalar type, will not use memcpy.
  1890. // So use nullptr for fieldAnnotation should be safe here.
  1891. SplitCpy(Dest->getType(), Dest, Src, idxList, Builder, DL, typeSys,
  1892. fieldAnnotation, bEltMemCpy);
  1893. // delete memcpy
  1894. DeleteMemcpy(MI);
  1895. }
  1896. void MemcpySplitter::Split(llvm::Function &F) {
  1897. const DataLayout &DL = F.getParent()->getDataLayout();
  1898. SmallVector<Function *, 2> memcpys;
  1899. for (Function &Fn : F.getParent()->functions()) {
  1900. if (Fn.getIntrinsicID() == Intrinsic::memcpy) {
  1901. memcpys.emplace_back(&Fn);
  1902. }
  1903. }
  1904. for (Function *memcpy : memcpys) {
  1905. for (auto U = memcpy->user_begin(); U != memcpy->user_end();) {
  1906. MemCpyInst *MI = cast<MemCpyInst>(*(U++));
  1907. if (MI->getParent()->getParent() != &F)
  1908. continue;
  1909. // Matrix is treated as scalar type, will not use memcpy.
  1910. // So use nullptr for fieldAnnotation should be safe here.
  1911. SplitMemCpy(MI, DL, /*fieldAnnotation*/ nullptr, m_typeSys,
  1912. /*bEltMemCpy*/ false);
  1913. }
  1914. }
  1915. }
  1916. //===----------------------------------------------------------------------===//
  1917. // SRoA Helper
  1918. //===----------------------------------------------------------------------===//
  1919. /// RewriteGEP - Rewrite the GEP to be relative to new element when can find a
  1920. /// new element which is struct field. If cannot find, create new element GEPs
  1921. /// and try to rewrite GEP with new GEPS.
  1922. void SROA_Helper::RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder) {
  1923. assert(OldVal == GEP->getPointerOperand() && "");
  1924. Value *NewPointer = nullptr;
  1925. SmallVector<Value *, 8> NewArgs;
  1926. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  1927. for (; GEPIt != E; ++GEPIt) {
  1928. if (GEPIt->isStructTy()) {
  1929. // must be const
  1930. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  1931. assert(IdxVal->getLimitedValue() < NewElts.size() && "");
  1932. NewPointer = NewElts[IdxVal->getLimitedValue()];
  1933. // The idx is used for NewPointer, not part of newGEP idx,
  1934. GEPIt++;
  1935. break;
  1936. } else if (GEPIt->isArrayTy()) {
  1937. // Add array idx.
  1938. NewArgs.push_back(GEPIt.getOperand());
  1939. } else if (GEPIt->isPointerTy()) {
  1940. // Add pointer idx.
  1941. NewArgs.push_back(GEPIt.getOperand());
  1942. } else if (GEPIt->isVectorTy()) {
  1943. // Add vector idx.
  1944. NewArgs.push_back(GEPIt.getOperand());
  1945. } else {
  1946. llvm_unreachable("should break from structTy");
  1947. }
  1948. }
  1949. if (NewPointer) {
  1950. // Struct split.
  1951. // Add rest of idx.
  1952. for (; GEPIt != E; ++GEPIt) {
  1953. NewArgs.push_back(GEPIt.getOperand());
  1954. }
  1955. // If only 1 level struct, just use the new pointer.
  1956. Value *NewGEP = NewPointer;
  1957. if (NewArgs.size() > 1) {
  1958. NewGEP = Builder.CreateInBoundsGEP(NewPointer, NewArgs);
  1959. NewGEP->takeName(GEP);
  1960. }
  1961. assert(NewGEP->getType() == GEP->getType() && "type mismatch");
  1962. GEP->replaceAllUsesWith(NewGEP);
  1963. } else {
  1964. // End at array of basic type.
  1965. Type *Ty = GEP->getType()->getPointerElementType();
  1966. if (Ty->isVectorTy() ||
  1967. (Ty->isStructTy() && !dxilutil::IsHLSLObjectType(Ty)) ||
  1968. Ty->isArrayTy()) {
  1969. SmallVector<Value *, 8> NewArgs;
  1970. NewArgs.append(GEP->idx_begin(), GEP->idx_end());
  1971. SmallVector<Value *, 8> NewGEPs;
  1972. // create new geps
  1973. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  1974. Value *NewGEP = Builder.CreateGEP(nullptr, NewElts[i], NewArgs);
  1975. NewGEPs.emplace_back(NewGEP);
  1976. }
  1977. const bool bAllowReplace = isa<AllocaInst>(OldVal);
  1978. if (!SROA_Helper::LowerMemcpy(GEP, /*annotation*/ nullptr, typeSys, DL, DT, bAllowReplace)) {
  1979. SROA_Helper helper(GEP, NewGEPs, DeadInsts, typeSys, DL, DT);
  1980. helper.RewriteForScalarRepl(GEP, Builder);
  1981. for (Value *NewGEP : NewGEPs) {
  1982. if (NewGEP->user_empty() && isa<Instruction>(NewGEP)) {
  1983. // Delete unused newGEP.
  1984. cast<Instruction>(NewGEP)->eraseFromParent();
  1985. }
  1986. }
  1987. }
  1988. } else {
  1989. Value *vecIdx = NewArgs.back();
  1990. if (ConstantInt *immVecIdx = dyn_cast<ConstantInt>(vecIdx)) {
  1991. // Replace vecArray[arrayIdx][immVecIdx]
  1992. // with scalarArray_immVecIdx[arrayIdx]
  1993. // Pop the vecIdx.
  1994. NewArgs.pop_back();
  1995. Value *NewGEP = NewElts[immVecIdx->getLimitedValue()];
  1996. if (NewArgs.size() > 1) {
  1997. NewGEP = Builder.CreateInBoundsGEP(NewGEP, NewArgs);
  1998. NewGEP->takeName(GEP);
  1999. }
  2000. assert(NewGEP->getType() == GEP->getType() && "type mismatch");
  2001. GEP->replaceAllUsesWith(NewGEP);
  2002. } else {
  2003. // dynamic vector indexing.
  2004. assert(0 && "should not reach here");
  2005. }
  2006. }
  2007. }
  2008. // Remove the use so that the caller can keep iterating over its other users
  2009. DXASSERT(GEP->user_empty(), "All uses of the GEP should have been eliminated");
  2010. if (isa<Instruction>(GEP)) {
  2011. GEP->setOperand(GEP->getPointerOperandIndex(), UndefValue::get(GEP->getPointerOperand()->getType()));
  2012. DeadInsts.push_back(GEP);
  2013. }
  2014. else {
  2015. cast<Constant>(GEP)->destroyConstant();
  2016. }
  2017. }
  2018. /// isVectorOrStructArray - Check if T is array of vector or struct.
  2019. static bool isVectorOrStructArray(Type *T) {
  2020. if (!T->isArrayTy())
  2021. return false;
  2022. T = dxilutil::GetArrayEltTy(T);
  2023. return T->isStructTy() || T->isVectorTy();
  2024. }
  2025. static void SimplifyStructValUsage(Value *StructVal, std::vector<Value *> Elts,
  2026. SmallVectorImpl<Value *> &DeadInsts) {
  2027. for (User *user : StructVal->users()) {
  2028. if (ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(user)) {
  2029. DXASSERT(Extract->getNumIndices() == 1, "only support 1 index case");
  2030. unsigned index = Extract->getIndices()[0];
  2031. Value *Elt = Elts[index];
  2032. Extract->replaceAllUsesWith(Elt);
  2033. DeadInsts.emplace_back(Extract);
  2034. } else if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(user)) {
  2035. DXASSERT(Insert->getNumIndices() == 1, "only support 1 index case");
  2036. unsigned index = Insert->getIndices()[0];
  2037. if (Insert->getAggregateOperand() == StructVal) {
  2038. // Update field.
  2039. std::vector<Value *> NewElts = Elts;
  2040. NewElts[index] = Insert->getInsertedValueOperand();
  2041. SimplifyStructValUsage(Insert, NewElts, DeadInsts);
  2042. } else {
  2043. // Insert to another bigger struct.
  2044. IRBuilder<> Builder(Insert);
  2045. Value *TmpStructVal = UndefValue::get(StructVal->getType());
  2046. for (unsigned i = 0; i < Elts.size(); i++) {
  2047. TmpStructVal =
  2048. Builder.CreateInsertValue(TmpStructVal, Elts[i], {i});
  2049. }
  2050. Insert->replaceUsesOfWith(StructVal, TmpStructVal);
  2051. }
  2052. }
  2053. }
  2054. }
  2055. /// RewriteForLoad - Replace OldVal with flattened NewElts in LoadInst.
  2056. void SROA_Helper::RewriteForLoad(LoadInst *LI) {
  2057. Type *LIType = LI->getType();
  2058. Type *ValTy = OldVal->getType()->getPointerElementType();
  2059. IRBuilder<> Builder(LI);
  2060. if (LIType->isVectorTy()) {
  2061. // Replace:
  2062. // %res = load { 2 x i32 }* %alloc
  2063. // with:
  2064. // %load.0 = load i32* %alloc.0
  2065. // %insert.0 insertvalue { 2 x i32 } zeroinitializer, i32 %load.0, 0
  2066. // %load.1 = load i32* %alloc.1
  2067. // %insert = insertvalue { 2 x i32 } %insert.0, i32 %load.1, 1
  2068. Value *Insert = UndefValue::get(LIType);
  2069. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2070. Value *Load = Builder.CreateLoad(NewElts[i], "load");
  2071. Insert = Builder.CreateInsertElement(Insert, Load, i, "insert");
  2072. }
  2073. LI->replaceAllUsesWith(Insert);
  2074. } else if (isCompatibleAggregate(LIType, ValTy)) {
  2075. if (isVectorOrStructArray(LIType)) {
  2076. // Replace:
  2077. // %res = load [2 x <2 x float>] * %alloc
  2078. // with:
  2079. // %load.0 = load [4 x float]* %alloc.0
  2080. // %insert.0 insertvalue [4 x float] zeroinitializer,i32 %load.0,0
  2081. // %load.1 = load [4 x float]* %alloc.1
  2082. // %insert = insertvalue [4 x float] %insert.0, i32 %load.1, 1
  2083. // ...
  2084. Type *i32Ty = Type::getInt32Ty(LIType->getContext());
  2085. Value *zero = ConstantInt::get(i32Ty, 0);
  2086. SmallVector<Value *, 8> idxList;
  2087. idxList.emplace_back(zero);
  2088. Value *newLd =
  2089. LoadVectorOrStructArray(cast<ArrayType>(LIType), NewElts, idxList, Builder);
  2090. LI->replaceAllUsesWith(newLd);
  2091. } else {
  2092. // Replace:
  2093. // %res = load { i32, i32 }* %alloc
  2094. // with:
  2095. // %load.0 = load i32* %alloc.0
  2096. // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0,
  2097. // 0
  2098. // %load.1 = load i32* %alloc.1
  2099. // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
  2100. // (Also works for arrays instead of structs)
  2101. Module *M = LI->getModule();
  2102. Value *Insert = UndefValue::get(LIType);
  2103. std::vector<Value *> LdElts(NewElts.size());
  2104. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2105. Value *Ptr = NewElts[i];
  2106. Type *Ty = Ptr->getType()->getPointerElementType();
  2107. Value *Load = nullptr;
  2108. if (!HLMatrixType::isa(Ty))
  2109. Load = Builder.CreateLoad(Ptr, "load");
  2110. else {
  2111. // Generate Matrix Load.
  2112. Load = HLModule::EmitHLOperationCall(
  2113. Builder, HLOpcodeGroup::HLMatLoadStore,
  2114. static_cast<unsigned>(HLMatLoadStoreOpcode::RowMatLoad), Ty,
  2115. {Ptr}, *M);
  2116. }
  2117. LdElts[i] = Load;
  2118. Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
  2119. }
  2120. LI->replaceAllUsesWith(Insert);
  2121. if (LIType->isStructTy()) {
  2122. SimplifyStructValUsage(Insert, LdElts, DeadInsts);
  2123. }
  2124. }
  2125. } else {
  2126. llvm_unreachable("other type don't need rewrite");
  2127. }
  2128. // Remove the use so that the caller can keep iterating over its other users
  2129. LI->setOperand(LI->getPointerOperandIndex(), UndefValue::get(LI->getPointerOperand()->getType()));
  2130. DeadInsts.push_back(LI);
  2131. }
  2132. /// RewriteForStore - Replace OldVal with flattened NewElts in StoreInst.
  2133. void SROA_Helper::RewriteForStore(StoreInst *SI) {
  2134. Value *Val = SI->getOperand(0);
  2135. Type *SIType = Val->getType();
  2136. IRBuilder<> Builder(SI);
  2137. Type *ValTy = OldVal->getType()->getPointerElementType();
  2138. if (SIType->isVectorTy()) {
  2139. // Replace:
  2140. // store <2 x float> %val, <2 x float>* %alloc
  2141. // with:
  2142. // %val.0 = extractelement { 2 x float } %val, 0
  2143. // store i32 %val.0, i32* %alloc.0
  2144. // %val.1 = extractelement { 2 x float } %val, 1
  2145. // store i32 %val.1, i32* %alloc.1
  2146. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2147. Value *Extract = Builder.CreateExtractElement(Val, i, Val->getName());
  2148. Builder.CreateStore(Extract, NewElts[i]);
  2149. }
  2150. } else if (isCompatibleAggregate(SIType, ValTy)) {
  2151. if (isVectorOrStructArray(SIType)) {
  2152. // Replace:
  2153. // store [2 x <2 x i32>] %val, [2 x <2 x i32>]* %alloc, align 16
  2154. // with:
  2155. // %val.0 = extractvalue [2 x <2 x i32>] %val, 0
  2156. // %all0c.0.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.0,
  2157. // i32 0, i32 0
  2158. // %val.0.0 = extractelement <2 x i32> %243, i64 0
  2159. // store i32 %val.0.0, i32* %all0c.0.0
  2160. // %alloc.1.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.1,
  2161. // i32 0, i32 0
  2162. // %val.0.1 = extractelement <2 x i32> %243, i64 1
  2163. // store i32 %val.0.1, i32* %alloc.1.0
  2164. // %val.1 = extractvalue [2 x <2 x i32>] %val, 1
  2165. // %alloc.0.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.0,
  2166. // i32 0, i32 1
  2167. // %val.1.0 = extractelement <2 x i32> %248, i64 0
  2168. // store i32 %val.1.0, i32* %alloc.0.0
  2169. // %all0c.1.1 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.1,
  2170. // i32 0, i32 1
  2171. // %val.1.1 = extractelement <2 x i32> %248, i64 1
  2172. // store i32 %val.1.1, i32* %all0c.1.1
  2173. ArrayType *AT = cast<ArrayType>(SIType);
  2174. Type *i32Ty = Type::getInt32Ty(SIType->getContext());
  2175. Value *zero = ConstantInt::get(i32Ty, 0);
  2176. SmallVector<Value *, 8> idxList;
  2177. idxList.emplace_back(zero);
  2178. StoreVectorOrStructArray(AT, Val, NewElts, idxList, Builder);
  2179. } else {
  2180. // Replace:
  2181. // store { i32, i32 } %val, { i32, i32 }* %alloc
  2182. // with:
  2183. // %val.0 = extractvalue { i32, i32 } %val, 0
  2184. // store i32 %val.0, i32* %alloc.0
  2185. // %val.1 = extractvalue { i32, i32 } %val, 1
  2186. // store i32 %val.1, i32* %alloc.1
  2187. // (Also works for arrays instead of structs)
  2188. Module *M = SI->getModule();
  2189. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2190. Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
  2191. if (!HLMatrixType::isa(Extract->getType())) {
  2192. Builder.CreateStore(Extract, NewElts[i]);
  2193. } else {
  2194. // Generate Matrix Store.
  2195. HLModule::EmitHLOperationCall(
  2196. Builder, HLOpcodeGroup::HLMatLoadStore,
  2197. static_cast<unsigned>(HLMatLoadStoreOpcode::RowMatStore),
  2198. Extract->getType(), {NewElts[i], Extract}, *M);
  2199. }
  2200. }
  2201. }
  2202. } else {
  2203. llvm_unreachable("other type don't need rewrite");
  2204. }
  2205. // Remove the use so that the caller can keep iterating over its other users
  2206. SI->setOperand(SI->getPointerOperandIndex(), UndefValue::get(SI->getPointerOperand()->getType()));
  2207. DeadInsts.push_back(SI);
  2208. }
  2209. /// RewriteMemIntrin - MI is a memcpy/memset/memmove from or to AI.
  2210. /// Rewrite it to copy or set the elements of the scalarized memory.
  2211. void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Value *OldV) {
  2212. // If this is a memcpy/memmove, construct the other pointer as the
  2213. // appropriate type. The "Other" pointer is the pointer that goes to memory
  2214. // that doesn't have anything to do with the alloca that we are promoting. For
  2215. // memset, this Value* stays null.
  2216. Value *OtherPtr = nullptr;
  2217. unsigned MemAlignment = MI->getAlignment();
  2218. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
  2219. if (OldV == MTI->getRawDest())
  2220. OtherPtr = MTI->getRawSource();
  2221. else {
  2222. assert(OldV == MTI->getRawSource());
  2223. OtherPtr = MTI->getRawDest();
  2224. }
  2225. }
  2226. // If there is an other pointer, we want to convert it to the same pointer
  2227. // type as AI has, so we can GEP through it safely.
  2228. if (OtherPtr) {
  2229. unsigned AddrSpace =
  2230. cast<PointerType>(OtherPtr->getType())->getAddressSpace();
  2231. // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
  2232. // optimization, but it's also required to detect the corner case where
  2233. // both pointer operands are referencing the same memory, and where
  2234. // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
  2235. // function is only called for mem intrinsics that access the whole
  2236. // aggregate, so non-zero GEPs are not an issue here.)
  2237. OtherPtr = OtherPtr->stripPointerCasts();
  2238. // Copying the alloca to itself is a no-op: just delete it.
  2239. if (OtherPtr == OldVal || OtherPtr == NewElts[0]) {
  2240. // This code will run twice for a no-op memcpy -- once for each operand.
  2241. // Put only one reference to MI on the DeadInsts list.
  2242. for (SmallVectorImpl<Value *>::const_iterator I = DeadInsts.begin(),
  2243. E = DeadInsts.end();
  2244. I != E; ++I)
  2245. if (*I == MI)
  2246. return;
  2247. // Remove the uses so that the caller can keep iterating over its other users
  2248. MI->setOperand(0, UndefValue::get(MI->getOperand(0)->getType()));
  2249. MI->setOperand(1, UndefValue::get(MI->getOperand(1)->getType()));
  2250. DeadInsts.push_back(MI);
  2251. return;
  2252. }
  2253. // If the pointer is not the right type, insert a bitcast to the right
  2254. // type.
  2255. Type *NewTy =
  2256. PointerType::get(OldVal->getType()->getPointerElementType(), AddrSpace);
  2257. if (OtherPtr->getType() != NewTy)
  2258. OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
  2259. }
  2260. // Process each element of the aggregate.
  2261. bool SROADest = MI->getRawDest() == OldV;
  2262. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
  2263. const DataLayout &DL = MI->getModule()->getDataLayout();
  2264. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2265. // If this is a memcpy/memmove, emit a GEP of the other element address.
  2266. Value *OtherElt = nullptr;
  2267. unsigned OtherEltAlign = MemAlignment;
  2268. if (OtherPtr) {
  2269. Value *Idx[2] = {Zero,
  2270. ConstantInt::get(Type::getInt32Ty(MI->getContext()), i)};
  2271. OtherElt = GetElementPtrInst::CreateInBounds(
  2272. OtherPtr, Idx, OtherPtr->getName() + "." + Twine(i), MI);
  2273. uint64_t EltOffset;
  2274. PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
  2275. Type *OtherTy = OtherPtrTy->getElementType();
  2276. if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
  2277. EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
  2278. } else {
  2279. Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
  2280. EltOffset = DL.getTypeAllocSize(EltTy) * i;
  2281. }
  2282. // The alignment of the other pointer is the guaranteed alignment of the
  2283. // element, which is affected by both the known alignment of the whole
  2284. // mem intrinsic and the alignment of the element. If the alignment of
  2285. // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
  2286. // known alignment is just 4 bytes.
  2287. OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
  2288. }
  2289. Value *EltPtr = NewElts[i];
  2290. Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
  2291. // If we got down to a scalar, insert a load or store as appropriate.
  2292. if (EltTy->isSingleValueType()) {
  2293. if (isa<MemTransferInst>(MI)) {
  2294. if (SROADest) {
  2295. // From Other to Alloca.
  2296. Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
  2297. new StoreInst(Elt, EltPtr, MI);
  2298. } else {
  2299. // From Alloca to Other.
  2300. Value *Elt = new LoadInst(EltPtr, "tmp", MI);
  2301. new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
  2302. }
  2303. continue;
  2304. }
  2305. assert(isa<MemSetInst>(MI));
  2306. // If the stored element is zero (common case), just store a null
  2307. // constant.
  2308. Constant *StoreVal;
  2309. if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
  2310. if (CI->isZero()) {
  2311. StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
  2312. } else {
  2313. // If EltTy is a vector type, get the element type.
  2314. Type *ValTy = EltTy->getScalarType();
  2315. // Construct an integer with the right value.
  2316. unsigned EltSize = DL.getTypeSizeInBits(ValTy);
  2317. APInt OneVal(EltSize, CI->getZExtValue());
  2318. APInt TotalVal(OneVal);
  2319. // Set each byte.
  2320. for (unsigned i = 0; 8 * i < EltSize; ++i) {
  2321. TotalVal = TotalVal.shl(8);
  2322. TotalVal |= OneVal;
  2323. }
  2324. // Convert the integer value to the appropriate type.
  2325. StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
  2326. if (ValTy->isPointerTy())
  2327. StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
  2328. else if (ValTy->isFloatingPointTy())
  2329. StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
  2330. assert(StoreVal->getType() == ValTy && "Type mismatch!");
  2331. // If the requested value was a vector constant, create it.
  2332. if (EltTy->isVectorTy()) {
  2333. unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
  2334. StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
  2335. }
  2336. }
  2337. new StoreInst(StoreVal, EltPtr, MI);
  2338. continue;
  2339. }
  2340. // Otherwise, if we're storing a byte variable, use a memset call for
  2341. // this element.
  2342. }
  2343. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  2344. if (!EltSize)
  2345. continue;
  2346. IRBuilder<> Builder(MI);
  2347. // Finally, insert the meminst for this element.
  2348. if (isa<MemSetInst>(MI)) {
  2349. Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
  2350. MI->isVolatile());
  2351. } else {
  2352. assert(isa<MemTransferInst>(MI));
  2353. Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
  2354. Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
  2355. if (isa<MemCpyInst>(MI))
  2356. Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,
  2357. MI->isVolatile());
  2358. else
  2359. Builder.CreateMemMove(Dst, Src, EltSize, OtherEltAlign,
  2360. MI->isVolatile());
  2361. }
  2362. }
  2363. // Remove the use so that the caller can keep iterating over its other users
  2364. MI->setOperand(0, UndefValue::get(MI->getOperand(0)->getType()));
  2365. if (isa<MemTransferInst>(MI))
  2366. MI->setOperand(1, UndefValue::get(MI->getOperand(1)->getType()));
  2367. DeadInsts.push_back(MI);
  2368. }
  2369. void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
  2370. // Unused bitcast may be leftover from temporary memcpy
  2371. if (BCI->use_empty()) {
  2372. BCI->eraseFromParent();
  2373. return;
  2374. }
  2375. Type *DstTy = BCI->getType();
  2376. Value *Val = BCI->getOperand(0);
  2377. Type *SrcTy = Val->getType();
  2378. if (!DstTy->isPointerTy()) {
  2379. assert(0 && "Type mismatch.");
  2380. return;
  2381. }
  2382. if (!SrcTy->isPointerTy()) {
  2383. assert(0 && "Type mismatch.");
  2384. return;
  2385. }
  2386. DstTy = DstTy->getPointerElementType();
  2387. SrcTy = SrcTy->getPointerElementType();
  2388. if (!DstTy->isStructTy()) {
  2389. assert(0 && "Type mismatch.");
  2390. return;
  2391. }
  2392. if (!SrcTy->isStructTy()) {
  2393. assert(0 && "Type mismatch.");
  2394. return;
  2395. }
  2396. // Only support bitcast to parent struct type.
  2397. StructType *DstST = cast<StructType>(DstTy);
  2398. StructType *SrcST = cast<StructType>(SrcTy);
  2399. bool bTypeMatch = false;
  2400. unsigned level = 0;
  2401. while (SrcST) {
  2402. level++;
  2403. Type *EltTy = SrcST->getElementType(0);
  2404. if (EltTy == DstST) {
  2405. bTypeMatch = true;
  2406. break;
  2407. }
  2408. SrcST = dyn_cast<StructType>(EltTy);
  2409. }
  2410. if (!bTypeMatch) {
  2411. // If the layouts match, just replace the type
  2412. SrcST = cast<StructType>(SrcTy);
  2413. if (SrcST->isLayoutIdentical(DstST)) {
  2414. BCI->mutateType(Val->getType());
  2415. BCI->replaceAllUsesWith(Val);
  2416. BCI->eraseFromParent();
  2417. return;
  2418. }
  2419. assert(0 && "Type mismatch.");
  2420. return;
  2421. }
  2422. std::vector<Value*> idxList(level+1);
  2423. ConstantInt *zeroIdx = ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
  2424. for (unsigned i=0;i<(level+1);i++)
  2425. idxList[i] = zeroIdx;
  2426. IRBuilder<> Builder(BCI);
  2427. Instruction *GEP = cast<Instruction>(Builder.CreateInBoundsGEP(Val, idxList));
  2428. BCI->replaceAllUsesWith(GEP);
  2429. BCI->eraseFromParent();
  2430. IRBuilder<> GEPBuilder(GEP);
  2431. RewriteForGEP(cast<GEPOperator>(GEP), GEPBuilder);
  2432. }
  2433. /// RewriteCallArg - For Functions which don't flat,
  2434. /// replace OldVal with alloca and
  2435. /// copy in copy out data between alloca and flattened NewElts
  2436. /// in CallInst.
  2437. void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
  2438. bool bOut) {
  2439. Function *F = CI->getParent()->getParent();
  2440. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  2441. const DataLayout &DL = F->getParent()->getDataLayout();
  2442. Value *userTyV = CI->getArgOperand(ArgIdx);
  2443. PointerType *userTy = cast<PointerType>(userTyV->getType());
  2444. Type *userTyElt = userTy->getElementType();
  2445. Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt);
  2446. IRBuilder<> Builder(CI);
  2447. if (bIn) {
  2448. MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
  2449. Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false));
  2450. RewriteMemIntrin(cpy, cpy->getRawSource());
  2451. }
  2452. CI->setArgOperand(ArgIdx, Alloca);
  2453. if (bOut) {
  2454. Builder.SetInsertPoint(CI->getNextNode());
  2455. MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
  2456. userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false));
  2457. RewriteMemIntrin(cpy, cpy->getRawSource());
  2458. }
  2459. }
  2460. // Flatten matching OldVal arg to NewElts, optionally loading values (loadElts).
  2461. // Does not replace or clean up old CallInst.
  2462. static CallInst *CreateFlattenedHLIntrinsicCall(
  2463. CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
  2464. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  2465. Function *F = CI->getCalledFunction();
  2466. DXASSERT_NOMSG(group == HLOpcodeGroup::HLIntrinsic);
  2467. unsigned opcode = GetHLOpcode(CI);
  2468. IRBuilder<> Builder(CI);
  2469. SmallVector<Value *, 4> flatArgs;
  2470. for (Value *arg : CI->arg_operands()) {
  2471. if (arg == OldVal) {
  2472. for (Value *Elt : NewElts) {
  2473. if (loadElts && Elt->getType()->isPointerTy())
  2474. Elt = Builder.CreateLoad(Elt);
  2475. flatArgs.emplace_back(Elt);
  2476. }
  2477. } else
  2478. flatArgs.emplace_back(arg);
  2479. }
  2480. SmallVector<Type *, 4> flatParamTys;
  2481. for (Value *arg : flatArgs)
  2482. flatParamTys.emplace_back(arg->getType());
  2483. FunctionType *flatFuncTy =
  2484. FunctionType::get(CI->getType(), flatParamTys, false);
  2485. Function *flatF =
  2486. GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode,
  2487. F->getAttributes().getFnAttributes());
  2488. return Builder.CreateCall(flatF, flatArgs);
  2489. }
  2490. static CallInst *RewriteWithFlattenedHLIntrinsicCall(
  2491. CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
  2492. CallInst *flatCI = CreateFlattenedHLIntrinsicCall(
  2493. CI, OldVal, NewElts, /*loadElts*/loadElts);
  2494. CI->replaceAllUsesWith(flatCI);
  2495. // Clear CI operands so we don't try to translate old call again
  2496. for (auto& opit : CI->operands())
  2497. opit.set(UndefValue::get(opit->getType()));
  2498. return flatCI;
  2499. }
  2500. /// RewriteCall - Replace OldVal with flattened NewElts in CallInst.
  2501. void SROA_Helper::RewriteCall(CallInst *CI) {
  2502. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  2503. if (group != HLOpcodeGroup::NotHL) {
  2504. unsigned opcode = GetHLOpcode(CI);
  2505. if (group == HLOpcodeGroup::HLIntrinsic) {
  2506. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  2507. switch (IOP) {
  2508. case IntrinsicOp::MOP_Append: {
  2509. // Buffer Append already expand in code gen.
  2510. // Must be OutputStream Append here.
  2511. // Every Elt has a pointer type.
  2512. // For Append, this is desired, so don't load.
  2513. RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/false);
  2514. DeadInsts.push_back(CI);
  2515. } break;
  2516. case IntrinsicOp::IOP_TraceRay: {
  2517. if (OldVal ==
  2518. CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) {
  2519. RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx,
  2520. /*bIn*/ true, /*bOut*/ false);
  2521. } else {
  2522. DXASSERT(OldVal ==
  2523. CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx),
  2524. "else invalid TraceRay");
  2525. RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx,
  2526. /*bIn*/ true, /*bOut*/ true);
  2527. }
  2528. } break;
  2529. case IntrinsicOp::IOP_ReportHit: {
  2530. RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx,
  2531. /*bIn*/ true, /*bOut*/ false);
  2532. } break;
  2533. case IntrinsicOp::IOP_CallShader: {
  2534. RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx,
  2535. /*bIn*/ true, /*bOut*/ true);
  2536. } break;
  2537. case IntrinsicOp::MOP_TraceRayInline: {
  2538. if (OldVal ==
  2539. CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) {
  2540. RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
  2541. DeadInsts.push_back(CI);
  2542. break;
  2543. }
  2544. }
  2545. __fallthrough;
  2546. default:
  2547. // RayQuery this pointer replacement.
  2548. if (OldVal->getType()->isPointerTy() &&
  2549. CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx &&
  2550. OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) &&
  2551. dxilutil::IsHLSLRayQueryType(
  2552. OldVal->getType()->getPointerElementType())) {
  2553. // For RayQuery methods, we want to replace the RayQuery this pointer
  2554. // with a load and use of the underlying handle value.
  2555. // This will allow elimination of RayQuery types earlier.
  2556. RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
  2557. DeadInsts.push_back(CI);
  2558. break;
  2559. }
  2560. DXASSERT(0, "cannot flatten hlsl intrinsic.");
  2561. }
  2562. }
  2563. // TODO: check other high level dx operations if need to.
  2564. } else {
  2565. DXASSERT(0, "should done at inline");
  2566. }
  2567. }
  2568. /// RewriteForAddrSpaceCast - Rewrite the AddrSpaceCast, either ConstExpr or Inst.
  2569. void SROA_Helper::RewriteForAddrSpaceCast(Value *CE,
  2570. IRBuilder<> &Builder) {
  2571. SmallVector<Value *, 8> NewCasts;
  2572. // create new AddrSpaceCast.
  2573. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2574. Value *NewCast = Builder.CreateAddrSpaceCast(
  2575. NewElts[i],
  2576. PointerType::get(NewElts[i]->getType()->getPointerElementType(),
  2577. CE->getType()->getPointerAddressSpace()));
  2578. NewCasts.emplace_back(NewCast);
  2579. }
  2580. SROA_Helper helper(CE, NewCasts, DeadInsts, typeSys, DL, DT);
  2581. helper.RewriteForScalarRepl(CE, Builder);
  2582. // Remove the use so that the caller can keep iterating over its other users
  2583. DXASSERT(CE->user_empty(), "All uses of the addrspacecast should have been eliminated");
  2584. if (Instruction *I = dyn_cast<Instruction>(CE))
  2585. I->eraseFromParent();
  2586. else
  2587. cast<Constant>(CE)->destroyConstant();
  2588. }
  2589. /// RewriteForConstExpr - Rewrite the GEP which is ConstantExpr.
  2590. void SROA_Helper::RewriteForConstExpr(ConstantExpr *CE, IRBuilder<> &Builder) {
  2591. if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
  2592. if (OldVal == GEP->getPointerOperand()) {
  2593. // Flatten GEP.
  2594. RewriteForGEP(GEP, Builder);
  2595. return;
  2596. }
  2597. }
  2598. if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  2599. if (OldVal == CE->getOperand(0)) {
  2600. // Flatten AddrSpaceCast.
  2601. RewriteForAddrSpaceCast(CE, Builder);
  2602. return;
  2603. }
  2604. }
  2605. for (Value::use_iterator UI = CE->use_begin(), E = CE->use_end(); UI != E;) {
  2606. Use &TheUse = *UI++;
  2607. if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
  2608. IRBuilder<> tmpBuilder(I);
  2609. // Replace CE with constInst.
  2610. Instruction *tmpInst = CE->getAsInstruction();
  2611. tmpBuilder.Insert(tmpInst);
  2612. TheUse.set(tmpInst);
  2613. }
  2614. else {
  2615. RewriteForConstExpr(cast<ConstantExpr>(TheUse.getUser()), Builder);
  2616. }
  2617. }
  2618. // Remove the use so that the caller can keep iterating over its other users
  2619. DXASSERT(CE->user_empty(), "All uses of the constantexpr should have been eliminated");
  2620. CE->destroyConstant();
  2621. }
  2622. /// RewriteForScalarRepl - OldVal is being split into NewElts, so rewrite
  2623. /// users of V, which references it, to use the separate elements.
  2624. void SROA_Helper::RewriteForScalarRepl(Value *V, IRBuilder<> &Builder) {
  2625. // Don't iterate upon the uses explicitly because we'll be removing them,
  2626. // and potentially adding new ones (if expanding memcpys) during the iteration.
  2627. Use* PrevUse = nullptr;
  2628. while (!V->use_empty()) {
  2629. Use &TheUse = *V->use_begin();
  2630. DXASSERT_LOCALVAR(PrevUse, &TheUse != PrevUse,
  2631. "Infinite loop while SROA'ing value, use isn't getting eliminated.");
  2632. PrevUse = &TheUse;
  2633. // Each of these must either call ->eraseFromParent()
  2634. // or null out the use of V so that we make progress.
  2635. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(TheUse.getUser())) {
  2636. RewriteForConstExpr(CE, Builder);
  2637. }
  2638. else {
  2639. Instruction *User = cast<Instruction>(TheUse.getUser());
  2640. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  2641. IRBuilder<> Builder(GEP);
  2642. RewriteForGEP(cast<GEPOperator>(GEP), Builder);
  2643. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(User))
  2644. RewriteForLoad(ldInst);
  2645. else if (StoreInst *stInst = dyn_cast<StoreInst>(User))
  2646. RewriteForStore(stInst);
  2647. else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User))
  2648. RewriteMemIntrin(MI, V);
  2649. else if (CallInst *CI = dyn_cast<CallInst>(User))
  2650. RewriteCall(CI);
  2651. else if (BitCastInst *BCI = dyn_cast<BitCastInst>(User))
  2652. RewriteBitCast(BCI);
  2653. else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(User)) {
  2654. RewriteForAddrSpaceCast(CI, Builder);
  2655. } else {
  2656. assert(0 && "not support.");
  2657. }
  2658. }
  2659. }
  2660. }
  2661. static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
  2662. ArrayRef<ArrayType *> nestArrayTys) {
  2663. Type *newAT = FinalEltTy;
  2664. for (auto ArrayTy = nestArrayTys.rbegin(), E=nestArrayTys.rend(); ArrayTy != E;
  2665. ++ArrayTy)
  2666. newAT = ArrayType::get(newAT, (*ArrayTy)->getNumElements());
  2667. return cast<ArrayType>(newAT);
  2668. }
  2669. /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  2670. /// Then do SROA on V.
  2671. bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
  2672. Type *&BrokenUpTy, uint64_t &NumInstances,
  2673. IRBuilder<> &Builder, bool bFlatVector,
  2674. bool hasPrecise, DxilTypeSystem &typeSys,
  2675. const DataLayout &DL,
  2676. SmallVector<Value *, 32> &DeadInsts,
  2677. DominatorTree *DT) {
  2678. DEBUG(dbgs() << "Found inst to SROA: " << *V << '\n');
  2679. Type *Ty = V->getType();
  2680. // Skip none pointer types.
  2681. if (!Ty->isPointerTy())
  2682. return false;
  2683. Ty = Ty->getPointerElementType();
  2684. // Skip none aggregate types.
  2685. if (!Ty->isAggregateType())
  2686. return false;
  2687. // Skip matrix types.
  2688. if (HLMatrixType::isa(Ty))
  2689. return false;
  2690. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  2691. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2692. // Skip HLSL object types and RayQuery.
  2693. if (dxilutil::IsHLSLObjectType(ST)) {
  2694. return false;
  2695. }
  2696. BrokenUpTy = ST;
  2697. NumInstances = 1;
  2698. unsigned numTypes = ST->getNumContainedTypes();
  2699. Elts.reserve(numTypes);
  2700. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  2701. // Skip empty struct.
  2702. if (SA && SA->IsEmptyStruct())
  2703. return true;
  2704. for (int i = 0, e = numTypes; i != e; ++i) {
  2705. AllocaInst *NA = AllocaBuilder.CreateAlloca(ST->getContainedType(i), nullptr, V->getName() + "." + Twine(i));
  2706. bool markPrecise = hasPrecise;
  2707. if (SA) {
  2708. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  2709. markPrecise |= FA.IsPrecise();
  2710. }
  2711. if (markPrecise)
  2712. HLModule::MarkPreciseAttributeWithMetadata(NA);
  2713. Elts.push_back(NA);
  2714. }
  2715. } else {
  2716. ArrayType *AT = cast<ArrayType>(Ty);
  2717. if (AT->getNumContainedTypes() == 0) {
  2718. // Skip case like [0 x %struct].
  2719. return false;
  2720. }
  2721. Type *ElTy = AT->getElementType();
  2722. SmallVector<ArrayType *, 4> nestArrayTys;
  2723. nestArrayTys.emplace_back(AT);
  2724. NumInstances = AT->getNumElements();
  2725. // support multi level of array
  2726. while (ElTy->isArrayTy()) {
  2727. ArrayType *ElAT = cast<ArrayType>(ElTy);
  2728. nestArrayTys.emplace_back(ElAT);
  2729. NumInstances *= ElAT->getNumElements();
  2730. ElTy = ElAT->getElementType();
  2731. }
  2732. BrokenUpTy = ElTy;
  2733. if (ElTy->isStructTy() &&
  2734. // Skip Matrix type.
  2735. !HLMatrixType::isa(ElTy)) {
  2736. if (!dxilutil::IsHLSLObjectType(ElTy)) {
  2737. // for array of struct
  2738. // split into arrays of struct elements
  2739. StructType *ElST = cast<StructType>(ElTy);
  2740. unsigned numTypes = ElST->getNumContainedTypes();
  2741. Elts.reserve(numTypes);
  2742. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ElST);
  2743. // Skip empty struct.
  2744. if (SA && SA->IsEmptyStruct())
  2745. return true;
  2746. for (int i = 0, e = numTypes; i != e; ++i) {
  2747. AllocaInst *NA = AllocaBuilder.CreateAlloca(
  2748. CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys),
  2749. nullptr, V->getName() + "." + Twine(i));
  2750. bool markPrecise = hasPrecise;
  2751. if (SA) {
  2752. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  2753. markPrecise |= FA.IsPrecise();
  2754. }
  2755. if (markPrecise)
  2756. HLModule::MarkPreciseAttributeWithMetadata(NA);
  2757. Elts.push_back(NA);
  2758. }
  2759. } else {
  2760. // For local resource array which not dynamic indexing,
  2761. // split it.
  2762. if (dxilutil::HasDynamicIndexing(V) ||
  2763. // Only support 1 dim split.
  2764. nestArrayTys.size() > 1)
  2765. return false;
  2766. BrokenUpTy = AT;
  2767. NumInstances = 1;
  2768. for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
  2769. AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
  2770. V->getName() + "." + Twine(i));
  2771. Elts.push_back(NA);
  2772. }
  2773. }
  2774. } else if (ElTy->isVectorTy()) {
  2775. // Skip vector if required.
  2776. if (!bFlatVector)
  2777. return false;
  2778. // for array of vector
  2779. // split into arrays of scalar
  2780. VectorType *ElVT = cast<VectorType>(ElTy);
  2781. BrokenUpTy = ElVT;
  2782. Elts.reserve(ElVT->getNumElements());
  2783. ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
  2784. for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
  2785. AllocaInst *NA = AllocaBuilder.CreateAlloca(scalarArrayTy, nullptr,
  2786. V->getName() + "." + Twine(i));
  2787. if (hasPrecise)
  2788. HLModule::MarkPreciseAttributeWithMetadata(NA);
  2789. Elts.push_back(NA);
  2790. }
  2791. } else
  2792. // Skip array of basic types.
  2793. return false;
  2794. }
  2795. // Now that we have created the new alloca instructions, rewrite all the
  2796. // uses of the old alloca.
  2797. SROA_Helper helper(V, Elts, DeadInsts, typeSys, DL, DT);
  2798. helper.RewriteForScalarRepl(V, Builder);
  2799. return true;
  2800. }
  2801. static Constant *GetEltInit(Type *Ty, Constant *Init, unsigned idx,
  2802. Type *EltTy) {
  2803. if (isa<UndefValue>(Init))
  2804. return UndefValue::get(EltTy);
  2805. if (dyn_cast<StructType>(Ty)) {
  2806. return Init->getAggregateElement(idx);
  2807. } else if (dyn_cast<VectorType>(Ty)) {
  2808. return Init->getAggregateElement(idx);
  2809. } else {
  2810. ArrayType *AT = cast<ArrayType>(Ty);
  2811. ArrayType *EltArrayTy = cast<ArrayType>(EltTy);
  2812. std::vector<Constant *> Elts;
  2813. if (!AT->getElementType()->isArrayTy()) {
  2814. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  2815. // Get Array[i]
  2816. Constant *InitArrayElt = Init->getAggregateElement(i);
  2817. // Get Array[i].idx
  2818. InitArrayElt = InitArrayElt->getAggregateElement(idx);
  2819. Elts.emplace_back(InitArrayElt);
  2820. }
  2821. return ConstantArray::get(EltArrayTy, Elts);
  2822. } else {
  2823. Type *EltTy = AT->getElementType();
  2824. ArrayType *NestEltArrayTy = cast<ArrayType>(EltArrayTy->getElementType());
  2825. // Nested array.
  2826. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  2827. // Get Array[i]
  2828. Constant *InitArrayElt = Init->getAggregateElement(i);
  2829. // Get Array[i].idx
  2830. InitArrayElt = GetEltInit(EltTy, InitArrayElt, idx, NestEltArrayTy);
  2831. Elts.emplace_back(InitArrayElt);
  2832. }
  2833. return ConstantArray::get(EltArrayTy, Elts);
  2834. }
  2835. }
  2836. }
  2837. unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
  2838. Type *EltTy, unsigned Offset) {
  2839. unsigned Alignment = ValueAlign;
  2840. if (ValueAlign == 0) {
  2841. // The minimum alignment which users can rely on when the explicit
  2842. // alignment is omitted or zero is that required by the ABI for this
  2843. // type.
  2844. Alignment = DL.getABITypeAlignment(EltTy);
  2845. }
  2846. return MinAlign(Alignment, Offset);
  2847. }
  2848. /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  2849. /// Then do SROA on V.
  2850. bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
  2851. std::vector<Value *> &Elts,
  2852. IRBuilder<> &Builder, bool bFlatVector,
  2853. bool hasPrecise, DxilTypeSystem &typeSys,
  2854. const DataLayout &DL,
  2855. SmallVector<Value *, 32> &DeadInsts,
  2856. DominatorTree *DT) {
  2857. DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
  2858. Type *Ty = GV->getType();
  2859. // Skip none pointer types.
  2860. if (!Ty->isPointerTy())
  2861. return false;
  2862. Ty = Ty->getPointerElementType();
  2863. // Skip none aggregate types.
  2864. if (!Ty->isAggregateType() && !bFlatVector)
  2865. return false;
  2866. // Skip basic types.
  2867. if (Ty->isSingleValueType() && !Ty->isVectorTy())
  2868. return false;
  2869. // Skip matrix types.
  2870. if (HLMatrixType::isa(Ty))
  2871. return false;
  2872. Module *M = GV->getParent();
  2873. Constant *Init = GV->hasInitializer() ? GV->getInitializer() : UndefValue::get(Ty);
  2874. bool isConst = GV->isConstant();
  2875. GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
  2876. unsigned AddressSpace = GV->getType()->getAddressSpace();
  2877. GlobalValue::LinkageTypes linkage = GV->getLinkage();
  2878. const unsigned Alignment = GV->getAlignment();
  2879. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2880. // Skip HLSL object types.
  2881. if (dxilutil::IsHLSLObjectType(ST))
  2882. return false;
  2883. unsigned numTypes = ST->getNumContainedTypes();
  2884. Elts.reserve(numTypes);
  2885. unsigned Offset = 0;
  2886. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  2887. for (int i = 0, e = numTypes; i != e; ++i) {
  2888. Type *EltTy = ST->getElementType(i);
  2889. Constant *EltInit = GetEltInit(Ty, Init, i, EltTy);
  2890. GlobalVariable *EltGV = new llvm::GlobalVariable(
  2891. *M, ST->getContainedType(i), /*IsConstant*/ isConst, linkage,
  2892. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  2893. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  2894. EltGV->setAlignment(GetEltAlign(Alignment, DL, EltTy, Offset));
  2895. Offset += DL.getTypeAllocSize(EltTy);
  2896. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  2897. // TODO: set precise.
  2898. // if (hasPrecise || FA.IsPrecise())
  2899. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  2900. Elts.push_back(EltGV);
  2901. }
  2902. } else if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
  2903. // TODO: support dynamic indexing on vector by change it to array.
  2904. unsigned numElts = VT->getNumElements();
  2905. Elts.reserve(numElts);
  2906. Type *EltTy = VT->getElementType();
  2907. unsigned Offset = 0;
  2908. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  2909. for (int i = 0, e = numElts; i != e; ++i) {
  2910. Constant *EltInit = GetEltInit(Ty, Init, i, EltTy);
  2911. GlobalVariable *EltGV = new llvm::GlobalVariable(
  2912. *M, EltTy, /*IsConstant*/ isConst, linkage,
  2913. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  2914. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  2915. EltGV->setAlignment(GetEltAlign(Alignment, DL, EltTy, Offset));
  2916. Offset += DL.getTypeAllocSize(EltTy);
  2917. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  2918. // TODO: set precise.
  2919. // if (hasPrecise || FA.IsPrecise())
  2920. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  2921. Elts.push_back(EltGV);
  2922. }
  2923. } else {
  2924. ArrayType *AT = cast<ArrayType>(Ty);
  2925. if (AT->getNumContainedTypes() == 0) {
  2926. // Skip case like [0 x %struct].
  2927. return false;
  2928. }
  2929. Type *ElTy = AT->getElementType();
  2930. SmallVector<ArrayType *, 4> nestArrayTys;
  2931. nestArrayTys.emplace_back(AT);
  2932. // support multi level of array
  2933. while (ElTy->isArrayTy()) {
  2934. ArrayType *ElAT = cast<ArrayType>(ElTy);
  2935. nestArrayTys.emplace_back(ElAT);
  2936. ElTy = ElAT->getElementType();
  2937. }
  2938. if (ElTy->isStructTy() &&
  2939. // Skip Matrix and Resource type.
  2940. !HLMatrixType::isa(ElTy) &&
  2941. !dxilutil::IsHLSLResourceType(ElTy)) {
  2942. // for array of struct
  2943. // split into arrays of struct elements
  2944. StructType *ElST = cast<StructType>(ElTy);
  2945. unsigned numTypes = ElST->getNumContainedTypes();
  2946. Elts.reserve(numTypes);
  2947. unsigned Offset = 0;
  2948. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ElST);
  2949. for (int i = 0, e = numTypes; i != e; ++i) {
  2950. Type *EltTy =
  2951. CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys);
  2952. Constant *EltInit = GetEltInit(Ty, Init, i, EltTy);
  2953. GlobalVariable *EltGV = new llvm::GlobalVariable(
  2954. *M, EltTy, /*IsConstant*/ isConst, linkage,
  2955. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  2956. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  2957. EltGV->setAlignment(GetEltAlign(Alignment, DL, EltTy, Offset));
  2958. Offset += DL.getTypeAllocSize(EltTy);
  2959. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  2960. // TODO: set precise.
  2961. // if (hasPrecise || FA.IsPrecise())
  2962. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  2963. Elts.push_back(EltGV);
  2964. }
  2965. } else if (ElTy->isVectorTy()) {
  2966. // Skip vector if required.
  2967. if (!bFlatVector)
  2968. return false;
  2969. // for array of vector
  2970. // split into arrays of scalar
  2971. VectorType *ElVT = cast<VectorType>(ElTy);
  2972. Elts.reserve(ElVT->getNumElements());
  2973. ArrayType *scalarArrayTy =
  2974. CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
  2975. unsigned Offset = 0;
  2976. for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
  2977. Constant *EltInit = GetEltInit(Ty, Init, i, scalarArrayTy);
  2978. GlobalVariable *EltGV = new llvm::GlobalVariable(
  2979. *M, scalarArrayTy, /*IsConstant*/ isConst, linkage,
  2980. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  2981. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  2982. // TODO: set precise.
  2983. // if (hasPrecise)
  2984. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  2985. EltGV->setAlignment(GetEltAlign(Alignment, DL, scalarArrayTy, Offset));
  2986. Offset += DL.getTypeAllocSize(scalarArrayTy);
  2987. Elts.push_back(EltGV);
  2988. }
  2989. } else
  2990. // Skip array of basic types.
  2991. return false;
  2992. }
  2993. // Now that we have created the new alloca instructions, rewrite all the
  2994. // uses of the old alloca.
  2995. SROA_Helper helper(GV, Elts, DeadInsts, typeSys, DL, DT);
  2996. helper.RewriteForScalarRepl(GV, Builder);
  2997. return true;
  2998. }
  2999. static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder) {
  3000. for (auto it = C->user_begin(); it != C->user_end(); ) {
  3001. User *U = *(it++);
  3002. if (Instruction *I = dyn_cast<Instruction>(U)) {
  3003. I->replaceUsesOfWith(C, V);
  3004. } else {
  3005. // Skip unused ConstantExpr.
  3006. if (U->user_empty())
  3007. continue;
  3008. ConstantExpr *CE = cast<ConstantExpr>(U);
  3009. Instruction *Inst = CE->getAsInstruction();
  3010. Builder.Insert(Inst);
  3011. Inst->replaceUsesOfWith(C, V);
  3012. ReplaceConstantWithInst(CE, Inst, Builder);
  3013. }
  3014. }
  3015. C->removeDeadConstantUsers();
  3016. }
  3017. static void ReplaceUnboundedArrayUses(Value *V, Value *Src) {
  3018. for (auto it = V->user_begin(); it != V->user_end(); ) {
  3019. User *U = *(it++);
  3020. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
  3021. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  3022. // Must set the insert point to the GEP itself (instead of the memcpy),
  3023. // because the indices might not dominate the memcpy.
  3024. IRBuilder<> Builder(GEP);
  3025. Value *NewGEP = Builder.CreateGEP(Src, idxList);
  3026. GEP->replaceAllUsesWith(NewGEP);
  3027. } else if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
  3028. BC->setOperand(0, Src);
  3029. } else {
  3030. DXASSERT(false, "otherwise unbounded array used in unexpected instruction");
  3031. }
  3032. }
  3033. }
  3034. static bool IsUnboundedArrayMemcpy(Type *destTy, Type *srcTy) {
  3035. return (destTy->isArrayTy() && srcTy->isArrayTy()) &&
  3036. (destTy->getArrayNumElements() == 0 || srcTy->getArrayNumElements() == 0);
  3037. }
  3038. static bool ArePointersToStructsOfIdenticalLayouts(Type *DstTy, Type *SrcTy) {
  3039. if (!SrcTy->isPointerTy() || !DstTy->isPointerTy())
  3040. return false;
  3041. DstTy = DstTy->getPointerElementType();
  3042. SrcTy = SrcTy->getPointerElementType();
  3043. if (!SrcTy->isStructTy() || !DstTy->isStructTy())
  3044. return false;
  3045. StructType *DstST = cast<StructType>(DstTy);
  3046. StructType *SrcST = cast<StructType>(SrcTy);
  3047. return SrcST->isLayoutIdentical(DstST);
  3048. }
  3049. static std::vector<Value *> GetConstValueIdxList(IRBuilder<> &builder,
  3050. std::vector<unsigned> idxlist) {
  3051. std::vector<Value *> idxConstList;
  3052. for (unsigned idx : idxlist) {
  3053. idxConstList.push_back(ConstantInt::get(builder.getInt32Ty(), idx));
  3054. }
  3055. return idxConstList;
  3056. }
  3057. static void CopyElementsOfStructsWithIdenticalLayout(
  3058. IRBuilder<> &builder, Value *destPtr, Value *srcPtr, Type *ty,
  3059. std::vector<unsigned>& idxlist) {
  3060. if (ty->isStructTy()) {
  3061. for (unsigned i = 0; i < ty->getStructNumElements(); i++) {
  3062. idxlist.push_back(i);
  3063. CopyElementsOfStructsWithIdenticalLayout(
  3064. builder, destPtr, srcPtr, ty->getStructElementType(i), idxlist);
  3065. idxlist.pop_back();
  3066. }
  3067. }
  3068. else if (ty->isArrayTy()) {
  3069. for (unsigned i = 0; i < ty->getArrayNumElements(); i++) {
  3070. idxlist.push_back(i);
  3071. CopyElementsOfStructsWithIdenticalLayout(
  3072. builder, destPtr, srcPtr, ty->getArrayElementType(), idxlist);
  3073. idxlist.pop_back();
  3074. }
  3075. }
  3076. else if (ty->isIntegerTy() || ty->isFloatTy() || ty->isDoubleTy() ||
  3077. ty->isHalfTy() || ty->isVectorTy()) {
  3078. Value *srcGEP =
  3079. builder.CreateInBoundsGEP(srcPtr, GetConstValueIdxList(builder, idxlist));
  3080. Value *destGEP =
  3081. builder.CreateInBoundsGEP(destPtr, GetConstValueIdxList(builder, idxlist));
  3082. LoadInst *LI = builder.CreateLoad(srcGEP);
  3083. builder.CreateStore(LI, destGEP);
  3084. }
  3085. else {
  3086. DXASSERT(0, "encountered unsupported type when copying elements of identical structs.");
  3087. }
  3088. }
  3089. static bool DominateAllUsers(Instruction *I, Value *V, DominatorTree *DT);
  3090. static bool ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
  3091. DxilFieldAnnotation *annotation, DxilTypeSystem &typeSys,
  3092. const DataLayout &DL, DominatorTree *DT) {
  3093. // If the only user of the src and dst is the memcpy,
  3094. // this memcpy was probably produced by splitting another.
  3095. // Regardless, the goal here is to replace, not remove the memcpy
  3096. // we won't have enough information to determine if we can do that before mem2reg
  3097. if (V != Src && V->hasOneUse() && Src->hasOneUse())
  3098. return false;
  3099. // If the memcpy doesn't dominate all its users,
  3100. // full replacement isn't possible without complicated PHI insertion
  3101. // This will likely replace with ld/st which will be replaced in mem2reg
  3102. if (Instruction *SrcI = dyn_cast<Instruction>(Src))
  3103. if (!DominateAllUsers(SrcI, V, DT))
  3104. return false;
  3105. Type *TyV = V->getType()->getPointerElementType();
  3106. Type *TySrc = Src->getType()->getPointerElementType();
  3107. if (Constant *C = dyn_cast<Constant>(V)) {
  3108. if (TyV == TySrc) {
  3109. if (isa<Constant>(Src)) {
  3110. V->replaceAllUsesWith(Src);
  3111. } else {
  3112. // Replace Constant with a non-Constant.
  3113. IRBuilder<> Builder(MC);
  3114. ReplaceConstantWithInst(C, Src, Builder);
  3115. }
  3116. } else {
  3117. IRBuilder<> Builder(MC);
  3118. Src = Builder.CreateBitCast(Src, V->getType());
  3119. ReplaceConstantWithInst(C, Src, Builder);
  3120. }
  3121. } else {
  3122. if (TyV == TySrc) {
  3123. if (V != Src)
  3124. V->replaceAllUsesWith(Src);
  3125. } else if (!IsUnboundedArrayMemcpy(TyV, TySrc)) {
  3126. Value* DestVal = MC->getRawDest();
  3127. Value* SrcVal = MC->getRawSource();
  3128. if (!isa<BitCastInst>(SrcVal) || !isa<BitCastInst>(DestVal)) {
  3129. DXASSERT(0, "Encountered unexpected instruction sequence");
  3130. return false;
  3131. }
  3132. BitCastInst *DestBCI = cast<BitCastInst>(DestVal);
  3133. BitCastInst *SrcBCI = cast<BitCastInst>(SrcVal);
  3134. Type* DstTy = DestBCI->getSrcTy();
  3135. Type *SrcTy = SrcBCI->getSrcTy();
  3136. if (ArePointersToStructsOfIdenticalLayouts(DstTy, SrcTy)) {
  3137. const DataLayout &DL = SrcBCI->getModule()->getDataLayout();
  3138. unsigned SrcSize = DL.getTypeAllocSize(
  3139. SrcBCI->getOperand(0)->getType()->getPointerElementType());
  3140. unsigned MemcpySize = cast<ConstantInt>(MC->getLength())->getZExtValue();
  3141. if (SrcSize != MemcpySize) {
  3142. DXASSERT(0, "Cannot handle partial memcpy");
  3143. return false;
  3144. }
  3145. if (DestBCI->hasOneUse() && SrcBCI->hasOneUse()) {
  3146. IRBuilder<> Builder(MC);
  3147. StructType *srcStTy = cast<StructType>(
  3148. SrcBCI->getOperand(0)->getType()->getPointerElementType());
  3149. std::vector<unsigned> idxlist = {0};
  3150. CopyElementsOfStructsWithIdenticalLayout(
  3151. Builder, DestBCI->getOperand(0), SrcBCI->getOperand(0), srcStTy,
  3152. idxlist);
  3153. }
  3154. } else {
  3155. if (DstTy == SrcTy) {
  3156. Value *DstPtr = DestBCI->getOperand(0);
  3157. Value *SrcPtr = SrcBCI->getOperand(0);
  3158. if (isa<GEPOperator>(DstPtr) || isa<GEPOperator>(SrcPtr)) {
  3159. MemcpySplitter::SplitMemCpy(MC, DL, annotation, typeSys);
  3160. return true;
  3161. } else {
  3162. DstPtr->replaceAllUsesWith(SrcPtr);
  3163. }
  3164. } else {
  3165. DXASSERT(0, "Can't handle structs of different layouts");
  3166. return false;
  3167. }
  3168. }
  3169. } else {
  3170. DXASSERT(IsUnboundedArrayMemcpy(TyV, TySrc), "otherwise mismatched types in memcpy are not unbounded array");
  3171. ReplaceUnboundedArrayUses(V, Src);
  3172. }
  3173. }
  3174. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Src)) {
  3175. // For const GV, if has stored, mark as non-constant.
  3176. if (GV->isConstant()) {
  3177. hlutil::PointerStatus PS(GV, 0, /*bLdStOnly*/ true);
  3178. PS.analyze(typeSys, /*bStructElt*/ false);
  3179. if (PS.HasStored())
  3180. GV->setConstant(false);
  3181. }
  3182. }
  3183. Value *RawDest = MC->getOperand(0);
  3184. Value *RawSrc = MC->getOperand(1);
  3185. MC->eraseFromParent();
  3186. if (Instruction *I = dyn_cast<Instruction>(RawDest)) {
  3187. if (I->user_empty())
  3188. I->eraseFromParent();
  3189. }
  3190. if (Instruction *I = dyn_cast<Instruction>(RawSrc)) {
  3191. if (I->user_empty())
  3192. I->eraseFromParent();
  3193. }
  3194. return true;
  3195. }
  3196. static bool ReplaceUseOfZeroInitEntry(Instruction *I, Value *V) {
  3197. BasicBlock *BB = I->getParent();
  3198. Function *F = I->getParent()->getParent();
  3199. for (auto U = V->user_begin(); U != V->user_end(); ) {
  3200. Instruction *UI = dyn_cast<Instruction>(*(U++));
  3201. if (!UI)
  3202. continue;
  3203. if (UI->getParent()->getParent() != F)
  3204. continue;
  3205. if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
  3206. if (!ReplaceUseOfZeroInitEntry(I, UI))
  3207. return false;
  3208. else
  3209. continue;
  3210. }
  3211. if (BB != UI->getParent() || UI == I)
  3212. continue;
  3213. // I is the last inst in the block after split.
  3214. // Any inst in current block is before I.
  3215. if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  3216. LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
  3217. LI->eraseFromParent();
  3218. continue;
  3219. }
  3220. return false;
  3221. }
  3222. return true;
  3223. }
  3224. static bool ReplaceUseOfZeroInitPostDom(Instruction *I, Value *V,
  3225. PostDominatorTree &PDT) {
  3226. BasicBlock *BB = I->getParent();
  3227. Function *F = I->getParent()->getParent();
  3228. for (auto U = V->user_begin(); U != V->user_end(); ) {
  3229. Instruction *UI = dyn_cast<Instruction>(*(U++));
  3230. if (!UI)
  3231. continue;
  3232. if (UI->getParent()->getParent() != F)
  3233. continue;
  3234. if (!PDT.dominates(BB, UI->getParent()))
  3235. return false;
  3236. if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
  3237. if (!ReplaceUseOfZeroInitPostDom(I, UI, PDT))
  3238. return false;
  3239. else
  3240. continue;
  3241. }
  3242. if (BB != UI->getParent() || UI == I)
  3243. continue;
  3244. // I is the last inst in the block after split.
  3245. // Any inst in current block is before I.
  3246. if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  3247. LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
  3248. LI->eraseFromParent();
  3249. continue;
  3250. }
  3251. return false;
  3252. }
  3253. return true;
  3254. }
  3255. // When zero initialized GV has only one define, all uses before the def should
  3256. // use zero.
  3257. static bool ReplaceUseOfZeroInitBeforeDef(Instruction *I, GlobalVariable *GV) {
  3258. BasicBlock *BB = I->getParent();
  3259. Function *F = I->getParent()->getParent();
  3260. // Make sure I is the last inst for BB.
  3261. if (I != BB->getTerminator())
  3262. BB->splitBasicBlock(I->getNextNode());
  3263. if (&F->getEntryBlock() == I->getParent()) {
  3264. return ReplaceUseOfZeroInitEntry(I, GV);
  3265. } else {
  3266. // Post dominator tree.
  3267. PostDominatorTree PDT;
  3268. PDT.runOnFunction(*F);
  3269. return ReplaceUseOfZeroInitPostDom(I, GV, PDT);
  3270. }
  3271. }
  3272. // Use `DT` to trace all users and make sure `I`'s BB dominates them all
  3273. static bool DominateAllUsersDom(Instruction *I, Value *V, DominatorTree *DT) {
  3274. BasicBlock *BB = I->getParent();
  3275. Function *F = I->getParent()->getParent();
  3276. for (auto U = V->user_begin(); U != V->user_end(); ) {
  3277. Instruction *UI = dyn_cast<Instruction>(*(U++));
  3278. // If not an instruction or from a differnt function, nothing to check, move along.
  3279. if (!UI || UI->getParent()->getParent() != F)
  3280. continue;
  3281. if (!DT->dominates(BB, UI->getParent()))
  3282. return false;
  3283. if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
  3284. if (!DominateAllUsersDom(I, UI, DT))
  3285. return false;
  3286. }
  3287. }
  3288. return true;
  3289. }
  3290. // Determine if `I` dominates all the users of `V`
  3291. static bool DominateAllUsers(Instruction *I, Value *V, DominatorTree *DT) {
  3292. Function *F = I->getParent()->getParent();
  3293. // The Entry Block dominates everything, trivially true
  3294. if (&F->getEntryBlock() == I->getParent())
  3295. return true;
  3296. if (!DT) {
  3297. DominatorTree TempDT;
  3298. TempDT.recalculate(*F);
  3299. return DominateAllUsersDom(I, V, &TempDT);
  3300. } else {
  3301. return DominateAllUsersDom(I, V, DT);
  3302. }
  3303. }
  3304. bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
  3305. DxilTypeSystem &typeSys, const DataLayout &DL,
  3306. DominatorTree *DT, bool bAllowReplace) {
  3307. Type *Ty = V->getType();
  3308. if (!Ty->isPointerTy()) {
  3309. return false;
  3310. }
  3311. // Get access status and collect memcpy uses.
  3312. // if MemcpyOnce, replace with dest with src if dest is not out param.
  3313. // else flat memcpy.
  3314. unsigned size = DL.getTypeAllocSize(Ty->getPointerElementType());
  3315. hlutil::PointerStatus PS(V, size, /*bLdStOnly*/ false);
  3316. const bool bStructElt = false;
  3317. PS.analyze(typeSys, bStructElt);
  3318. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
  3319. if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
  3320. if (PS.storedType == hlutil::PointerStatus::StoredType::NotStored) {
  3321. PS.storedType = hlutil::PointerStatus::StoredType::InitializerStored;
  3322. } else if (PS.storedType ==
  3323. hlutil::PointerStatus::StoredType::MemcopyDestOnce) {
  3324. // For single mem store, if the store does not dominate all users.
  3325. // Mark it as Stored.
  3326. // In cases like:
  3327. // struct A { float4 x[25]; };
  3328. // A a;
  3329. // static A a2;
  3330. // void set(A aa) { aa = a; }
  3331. // call set inside entry function then use a2.
  3332. if (isa<ConstantAggregateZero>(GV->getInitializer())) {
  3333. Instruction * Memcpy = PS.StoringMemcpy;
  3334. if (!ReplaceUseOfZeroInitBeforeDef(Memcpy, GV)) {
  3335. PS.storedType = hlutil::PointerStatus::StoredType::Stored;
  3336. }
  3337. }
  3338. } else {
  3339. PS.storedType = hlutil::PointerStatus::StoredType::Stored;
  3340. }
  3341. }
  3342. }
  3343. if (bAllowReplace && !PS.HasMultipleAccessingFunctions) {
  3344. if (PS.storedType == hlutil::PointerStatus::StoredType::MemcopyDestOnce &&
  3345. // Skip argument for input argument has input value, it is not dest once anymore.
  3346. !isa<Argument>(V)) {
  3347. // Replace with src of memcpy.
  3348. MemCpyInst *MC = PS.StoringMemcpy;
  3349. if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
  3350. Value *Src = MC->getOperand(1);
  3351. // Only remove one level bitcast generated from inline.
  3352. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  3353. Src = BC->getOperand(0);
  3354. if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
  3355. // For GEP, the ptr could have other GEP read/write.
  3356. // Only scan one GEP is not enough.
  3357. Value *Ptr = GEP->getPointerOperand();
  3358. while (GEPOperator *NestedGEP = dyn_cast<GEPOperator>(Ptr))
  3359. Ptr = NestedGEP->getPointerOperand();
  3360. if (CallInst *PtrCI = dyn_cast<CallInst>(Ptr)) {
  3361. hlsl::HLOpcodeGroup group =
  3362. hlsl::GetHLOpcodeGroup(PtrCI->getCalledFunction());
  3363. if (group == HLOpcodeGroup::HLSubscript) {
  3364. HLSubscriptOpcode opcode =
  3365. static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(PtrCI));
  3366. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  3367. // Ptr from CBuffer is safe.
  3368. if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
  3369. return true;
  3370. }
  3371. }
  3372. }
  3373. } else if (!isa<CallInst>(Src)) {
  3374. // Resource ptr should not be replaced.
  3375. // Need to make sure src not updated after current memcpy.
  3376. // Check Src only have 1 store now.
  3377. hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
  3378. SrcPS.analyze(typeSys, bStructElt);
  3379. if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
  3380. if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
  3381. return true;
  3382. }
  3383. }
  3384. }
  3385. } else if (PS.loadedType ==
  3386. hlutil::PointerStatus::LoadedType::MemcopySrcOnce) {
  3387. // Replace dst of memcpy.
  3388. MemCpyInst *MC = PS.LoadingMemcpy;
  3389. if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
  3390. Value *Dest = MC->getOperand(0);
  3391. // Only remove one level bitcast generated from inline.
  3392. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  3393. Dest = BC->getOperand(0);
  3394. // For GEP, the ptr could have other GEP read/write.
  3395. // Only scan one GEP is not enough.
  3396. // And resource ptr should not be replaced.
  3397. if (!isa<GEPOperator>(Dest) && !isa<CallInst>(Dest) &&
  3398. !isa<BitCastOperator>(Dest)) {
  3399. // Need to make sure Dest not updated after current memcpy.
  3400. // Check Dest only have 1 store now.
  3401. hlutil::PointerStatus DestPS(Dest, size, /*bLdStOnly*/ false);
  3402. DestPS.analyze(typeSys, bStructElt);
  3403. if (DestPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
  3404. if (ReplaceMemcpy(Dest, V, MC, annotation, typeSys, DL, DT)) {
  3405. // V still needs to be flattened.
  3406. // Lower memcpy come from Dest.
  3407. return LowerMemcpy(V, annotation, typeSys, DL, DT, bAllowReplace);
  3408. }
  3409. }
  3410. }
  3411. }
  3412. }
  3413. }
  3414. for (MemCpyInst *MC : PS.memcpySet) {
  3415. MemcpySplitter::SplitMemCpy(MC, DL, annotation, typeSys);
  3416. }
  3417. return false;
  3418. }
  3419. /// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts.
  3420. void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) {
  3421. UndefValue *undef = UndefValue::get(V->getType());
  3422. for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) {
  3423. Value *U = *(itU++);
  3424. // Kill memcpy, set operands to undef for call and ret, and recurse
  3425. if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
  3426. DeadInsts.emplace_back(MC);
  3427. } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
  3428. for (auto &operand : CI->operands()) {
  3429. if (operand == V)
  3430. operand.set(undef);
  3431. }
  3432. } else if (ReturnInst *Ret = dyn_cast<ReturnInst>(U)) {
  3433. Ret->setOperand(0, undef);
  3434. } else if (isa<Constant>(U) || isa<GetElementPtrInst>(U) ||
  3435. isa<BitCastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) {
  3436. // Recurse users
  3437. MarkEmptyStructUsers(U, DeadInsts);
  3438. } else {
  3439. DXASSERT(false, "otherwise, recursing unexpected empty struct user");
  3440. }
  3441. }
  3442. if (Instruction *I = dyn_cast<Instruction>(V)) {
  3443. // Only need to add no use inst here.
  3444. // DeleteDeadInst will delete everything.
  3445. if (I->user_empty())
  3446. DeadInsts.emplace_back(I);
  3447. }
  3448. }
  3449. bool SROA_Helper::IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys) {
  3450. if (isa<ArrayType>(Ty))
  3451. Ty = Ty->getArrayElementType();
  3452. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  3453. if (!HLMatrixType::isa(Ty)) {
  3454. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  3455. if (SA && SA->IsEmptyStruct())
  3456. return true;
  3457. }
  3458. }
  3459. return false;
  3460. }
  3461. //===----------------------------------------------------------------------===//
  3462. // SROA on function parameters.
  3463. //===----------------------------------------------------------------------===//
  3464. static void LegalizeDxilInputOutputs(Function *F,
  3465. DxilFunctionAnnotation *EntryAnnotation,
  3466. const DataLayout &DL,
  3467. DxilTypeSystem &typeSys);
  3468. static void InjectReturnAfterNoReturnPreserveOutput(HLModule &HLM);
  3469. namespace {
  3470. class SROA_Parameter_HLSL : public ModulePass {
  3471. HLModule *m_pHLModule;
  3472. public:
  3473. static char ID; // Pass identification, replacement for typeid
  3474. explicit SROA_Parameter_HLSL() : ModulePass(ID) {}
  3475. const char *getPassName() const override { return "SROA Parameter HLSL"; }
  3476. static void RewriteBitcastWithIdenticalStructs(Function *F);
  3477. static void RewriteBitcastWithIdenticalStructs(BitCastInst *BCI);
  3478. bool runOnModule(Module &M) override {
  3479. // Patch memcpy to cover case bitcast (gep ptr, 0,0) is transformed into
  3480. // bitcast ptr.
  3481. MemcpySplitter::PatchMemCpyWithZeroIdxGEP(M);
  3482. m_pHLModule = &M.GetOrCreateHLModule();
  3483. const DataLayout &DL = M.getDataLayout();
  3484. // Load up debug information, to cross-reference values and the instructions
  3485. // used to load them.
  3486. m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  3487. InjectReturnAfterNoReturnPreserveOutput(*m_pHLModule);
  3488. std::deque<Function *> WorkList;
  3489. std::vector<Function *> DeadHLFunctions;
  3490. for (Function &F : M.functions()) {
  3491. HLOpcodeGroup group = GetHLOpcodeGroup(&F);
  3492. // Skip HL operations.
  3493. if (group != HLOpcodeGroup::NotHL ||
  3494. group == HLOpcodeGroup::HLExtIntrinsic) {
  3495. if (F.user_empty())
  3496. DeadHLFunctions.emplace_back(&F);
  3497. continue;
  3498. }
  3499. if (F.isDeclaration()) {
  3500. // Skip llvm intrinsic.
  3501. if (F.isIntrinsic())
  3502. continue;
  3503. // Skip unused external function.
  3504. if (F.user_empty())
  3505. continue;
  3506. }
  3507. // Skip void(void) functions.
  3508. if (F.getReturnType()->isVoidTy() && F.arg_size() == 0)
  3509. continue;
  3510. // Skip library function, except to LegalizeDxilInputOutputs
  3511. if (&F != m_pHLModule->GetEntryFunction() &&
  3512. !m_pHLModule->IsEntryThatUsesSignatures(&F)) {
  3513. if (!F.isDeclaration())
  3514. LegalizeDxilInputOutputs(&F, m_pHLModule->GetFunctionAnnotation(&F),
  3515. DL, m_pHLModule->GetTypeSystem());
  3516. continue;
  3517. }
  3518. WorkList.emplace_back(&F);
  3519. }
  3520. // Remove dead hl functions here.
  3521. // This is for hl functions which has body and always inline.
  3522. for (Function *F : DeadHLFunctions) {
  3523. F->eraseFromParent();
  3524. }
  3525. // Preprocess aggregate function param used as function call arg.
  3526. for (Function *F : WorkList) {
  3527. preprocessArgUsedInCall(F);
  3528. }
  3529. // Process the worklist
  3530. while (!WorkList.empty()) {
  3531. Function *F = WorkList.front();
  3532. WorkList.pop_front();
  3533. RewriteBitcastWithIdenticalStructs(F);
  3534. createFlattenedFunction(F);
  3535. }
  3536. // Replace functions with flattened version when we flat all the functions.
  3537. for (auto Iter : funcMap)
  3538. replaceCall(Iter.first, Iter.second);
  3539. // Update patch constant function.
  3540. for (Function &F : M.functions()) {
  3541. if (F.isDeclaration())
  3542. continue;
  3543. if (!m_pHLModule->HasDxilFunctionProps(&F))
  3544. continue;
  3545. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(&F);
  3546. if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
  3547. Function *oldPatchConstantFunc =
  3548. funcProps.ShaderProps.HS.patchConstantFunc;
  3549. if (funcMap.count(oldPatchConstantFunc))
  3550. m_pHLModule->SetPatchConstantFunctionForHS(&F, funcMap[oldPatchConstantFunc]);
  3551. }
  3552. }
  3553. // Remove flattened functions.
  3554. for (auto Iter : funcMap) {
  3555. Function *F = Iter.first;
  3556. Function *flatF = Iter.second;
  3557. flatF->takeName(F);
  3558. F->eraseFromParent();
  3559. }
  3560. // Flatten internal global.
  3561. std::vector<GlobalVariable *> staticGVs;
  3562. for (GlobalVariable &GV : M.globals()) {
  3563. if (dxilutil::IsStaticGlobal(&GV) ||
  3564. dxilutil::IsSharedMemoryGlobal(&GV)) {
  3565. staticGVs.emplace_back(&GV);
  3566. } else {
  3567. // merge GEP use for global.
  3568. HLModule::MergeGepUse(&GV);
  3569. }
  3570. }
  3571. for (GlobalVariable *GV : staticGVs)
  3572. flattenGlobal(GV);
  3573. // Remove unused internal global.
  3574. staticGVs.clear();
  3575. for (GlobalVariable &GV : M.globals()) {
  3576. if (dxilutil::IsStaticGlobal(&GV) ||
  3577. dxilutil::IsSharedMemoryGlobal(&GV)) {
  3578. staticGVs.emplace_back(&GV);
  3579. }
  3580. }
  3581. for (GlobalVariable *GV : staticGVs) {
  3582. bool onlyStoreUse = true;
  3583. for (User *user : GV->users()) {
  3584. if (isa<StoreInst>(user))
  3585. continue;
  3586. if (isa<ConstantExpr>(user) && user->user_empty())
  3587. continue;
  3588. // Check matrix store.
  3589. if (HLMatrixType::isa(
  3590. GV->getType()->getPointerElementType())) {
  3591. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  3592. if (GetHLOpcodeGroupByName(CI->getCalledFunction()) ==
  3593. HLOpcodeGroup::HLMatLoadStore) {
  3594. HLMatLoadStoreOpcode opcode =
  3595. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  3596. if (opcode == HLMatLoadStoreOpcode::ColMatStore ||
  3597. opcode == HLMatLoadStoreOpcode::RowMatStore)
  3598. continue;
  3599. }
  3600. }
  3601. }
  3602. onlyStoreUse = false;
  3603. break;
  3604. }
  3605. if (onlyStoreUse) {
  3606. for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) {
  3607. Value *User = *(UserIt++);
  3608. if (Instruction *I = dyn_cast<Instruction>(User)) {
  3609. I->eraseFromParent();
  3610. }
  3611. else {
  3612. ConstantExpr *CE = cast<ConstantExpr>(User);
  3613. CE->dropAllReferences();
  3614. }
  3615. }
  3616. GV->eraseFromParent();
  3617. }
  3618. }
  3619. return true;
  3620. }
  3621. private:
  3622. void DeleteDeadInstructions();
  3623. void preprocessArgUsedInCall(Function *F);
  3624. void moveFunctionBody(Function *F, Function *flatF);
  3625. void replaceCall(Function *F, Function *flatF);
  3626. void createFlattenedFunction(Function *F);
  3627. void
  3628. flattenArgument(Function *F, Value *Arg, bool bForParam,
  3629. DxilParameterAnnotation &paramAnnotation,
  3630. std::vector<Value *> &FlatParamList,
  3631. std::vector<DxilParameterAnnotation> &FlatRetAnnotationList,
  3632. BasicBlock *EntryBlock, DbgDeclareInst *DDI);
  3633. Value *castResourceArgIfRequired(Value *V, Type *Ty, bool bOut,
  3634. DxilParamInputQual inputQual,
  3635. IRBuilder<> &Builder);
  3636. Value *castArgumentIfRequired(Value *V, Type *Ty, bool bOut,
  3637. DxilParamInputQual inputQual,
  3638. DxilFieldAnnotation &annotation,
  3639. IRBuilder<> &Builder,
  3640. DxilTypeSystem &TypeSys);
  3641. // Replace use of parameter which changed type when flatten.
  3642. // Also add information to Arg if required.
  3643. void replaceCastParameter(Value *NewParam, Value *OldParam, Function &F,
  3644. Argument *Arg, const DxilParamInputQual inputQual,
  3645. IRBuilder<> &Builder);
  3646. void allocateSemanticIndex(
  3647. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  3648. unsigned startArgIndex, llvm::StringMap<Type *> &semanticTypeMap);
  3649. bool hasDynamicVectorIndexing(Value *V);
  3650. void flattenGlobal(GlobalVariable *GV);
  3651. //static std::vector<Value*> GetConstValueIdxList(IRBuilder<>& builder, std::vector<unsigned> idxlist);
  3652. /// DeadInsts - Keep track of instructions we have made dead, so that
  3653. /// we can remove them after we are done working.
  3654. SmallVector<Value *, 32> DeadInsts;
  3655. // Map from orginal function to the flatten version.
  3656. MapVector<Function *, Function *> funcMap; // Need deterministic order of iteration
  3657. // Map from original arg/param to flatten cast version.
  3658. std::unordered_map<Value *, std::pair<Value*, DxilParamInputQual>> castParamMap;
  3659. // Map form first element of a vector the list of all elements of the vector.
  3660. std::unordered_map<Value *, SmallVector<Value*, 4> > vectorEltsMap;
  3661. // Set for row major matrix parameter.
  3662. std::unordered_set<Value *> castRowMajorParamMap;
  3663. bool m_HasDbgInfo;
  3664. };
  3665. // When replacing aggregates by its scalar elements,
  3666. // the first element will preserve the original semantic,
  3667. // and the subsequent ones will temporarily use this value.
  3668. // We then run a pass to fix the semantics and properly renumber them
  3669. // once the aggregate has been fully expanded.
  3670. //
  3671. // For example:
  3672. // struct Foo { float a; float b; };
  3673. // void main(Foo foo : TEXCOORD0, float bar : TEXCOORD0)
  3674. //
  3675. // Will be expanded to
  3676. // void main(float a : TEXCOORD0, float b : *, float bar : TEXCOORD0)
  3677. //
  3678. // And then fixed up to
  3679. // void main(float a : TEXCOORD0, float b : TEXCOORD1, float bar : TEXCOORD0)
  3680. //
  3681. // (which will later on fail validation due to duplicate semantics).
  3682. constexpr const char *ContinuedPseudoSemantic = "*";
  3683. }
  3684. char SROA_Parameter_HLSL::ID = 0;
  3685. INITIALIZE_PASS(SROA_Parameter_HLSL, "scalarrepl-param-hlsl",
  3686. "Scalar Replacement of Aggregates HLSL (parameters)", false,
  3687. false)
  3688. void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(Function *F) {
  3689. if (F->isDeclaration())
  3690. return;
  3691. // Gather list of bitcast involving src and dest structs with identical layout
  3692. std::vector<BitCastInst*> worklist;
  3693. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
  3694. if (BitCastInst *BCI = dyn_cast<BitCastInst>(&*I)) {
  3695. Type *DstTy = BCI->getDestTy();
  3696. Type *SrcTy = BCI->getSrcTy();
  3697. if(ArePointersToStructsOfIdenticalLayouts(DstTy, SrcTy))
  3698. worklist.push_back(BCI);
  3699. }
  3700. }
  3701. // Replace bitcast involving src and dest structs with identical layout
  3702. while (!worklist.empty()) {
  3703. BitCastInst *BCI = worklist.back();
  3704. worklist.pop_back();
  3705. RewriteBitcastWithIdenticalStructs(BCI);
  3706. }
  3707. }
  3708. void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(BitCastInst *BCI) {
  3709. StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
  3710. StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
  3711. Value* srcPtr = BCI->getOperand(0);
  3712. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
  3713. AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
  3714. IRBuilder<> InstBuilder(BCI);
  3715. std::vector<unsigned> idxlist = { 0 };
  3716. CopyElementsOfStructsWithIdenticalLayout(InstBuilder, destPtr, srcPtr, srcStTy, idxlist);
  3717. BCI->replaceAllUsesWith(destPtr);
  3718. BCI->eraseFromParent();
  3719. }
  3720. /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
  3721. /// recursively including all their operands that become trivially dead.
  3722. void SROA_Parameter_HLSL::DeleteDeadInstructions() {
  3723. while (!DeadInsts.empty()) {
  3724. Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
  3725. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
  3726. if (Instruction *U = dyn_cast<Instruction>(*OI)) {
  3727. // Zero out the operand and see if it becomes trivially dead.
  3728. // (But, don't add allocas to the dead instruction list -- they are
  3729. // already on the worklist and will be deleted separately.)
  3730. *OI = nullptr;
  3731. if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
  3732. DeadInsts.push_back(U);
  3733. }
  3734. I->eraseFromParent();
  3735. }
  3736. }
  3737. bool SROA_Parameter_HLSL::hasDynamicVectorIndexing(Value *V) {
  3738. for (User *U : V->users()) {
  3739. if (!U->getType()->isPointerTy())
  3740. continue;
  3741. if (dyn_cast<GEPOperator>(U)) {
  3742. gep_type_iterator GEPIt = gep_type_begin(U), E = gep_type_end(U);
  3743. for (; GEPIt != E; ++GEPIt) {
  3744. if (isa<VectorType>(*GEPIt)) {
  3745. Value *VecIdx = GEPIt.getOperand();
  3746. if (!isa<ConstantInt>(VecIdx))
  3747. return true;
  3748. }
  3749. }
  3750. }
  3751. }
  3752. return false;
  3753. }
  3754. void SROA_Parameter_HLSL::flattenGlobal(GlobalVariable *GV) {
  3755. Type *Ty = GV->getType()->getPointerElementType();
  3756. // Skip basic types.
  3757. if (!Ty->isAggregateType() && !Ty->isVectorTy())
  3758. return;
  3759. std::deque<Value *> WorkList;
  3760. WorkList.push_back(GV);
  3761. // merge GEP use for global.
  3762. HLModule::MergeGepUse(GV);
  3763. DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
  3764. // Only used to create ConstantExpr.
  3765. IRBuilder<> Builder(m_pHLModule->GetCtx());
  3766. std::vector<Instruction*> deadAllocas;
  3767. const DataLayout &DL = GV->getParent()->getDataLayout();
  3768. unsigned debugOffset = 0;
  3769. std::unordered_map<Value*, StringRef> EltNameMap;
  3770. // Process the worklist
  3771. while (!WorkList.empty()) {
  3772. GlobalVariable *EltGV = cast<GlobalVariable>(WorkList.front());
  3773. WorkList.pop_front();
  3774. const bool bAllowReplace = true;
  3775. // SROA_Parameter_HLSL has no access to a domtree, if one is needed, it'll be generated
  3776. if (SROA_Helper::LowerMemcpy(EltGV, /*annotation*/ nullptr, dxilTypeSys, DL,
  3777. nullptr /*DT */, bAllowReplace)) {
  3778. continue;
  3779. }
  3780. // Flat Global vector if no dynamic vector indexing.
  3781. bool bFlatVector = !hasDynamicVectorIndexing(EltGV);
  3782. // Disable scalarization of groupshared/const_static vector arrays
  3783. if ((GV->getType()->getAddressSpace() == DXIL::kTGSMAddrSpace ||
  3784. (GV->isConstant() && GV->hasInitializer() &&
  3785. GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage)) &&
  3786. Ty->isArrayTy())
  3787. bFlatVector = false;
  3788. std::vector<Value *> Elts;
  3789. bool SROAed = false;
  3790. if (GlobalVariable *NewEltGV = dyn_cast_or_null<GlobalVariable>(
  3791. TranslatePtrIfUsedByLoweredFn(EltGV, dxilTypeSys))) {
  3792. if (GV != EltGV) {
  3793. EltGV->removeDeadConstantUsers();
  3794. EltGV->eraseFromParent();
  3795. }
  3796. EltGV = NewEltGV;
  3797. } else {
  3798. // SROA_Parameter_HLSL has no access to a domtree, if one is needed, it'll be generated
  3799. SROAed = SROA_Helper::DoScalarReplacement(
  3800. EltGV, Elts, Builder, bFlatVector,
  3801. // TODO: set precise.
  3802. /*hasPrecise*/ false, dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr);
  3803. }
  3804. if (SROAed) {
  3805. // Push Elts into workList.
  3806. // Use rbegin to make sure the order not change.
  3807. for (auto iter = Elts.rbegin(); iter != Elts.rend(); iter++) {
  3808. WorkList.push_front(*iter);
  3809. if (m_HasDbgInfo) {
  3810. StringRef EltName = (*iter)->getName().ltrim(GV->getName());
  3811. EltNameMap[*iter] = EltName;
  3812. }
  3813. }
  3814. EltGV->removeDeadConstantUsers();
  3815. // Now erase any instructions that were made dead while rewriting the
  3816. // alloca.
  3817. DeleteDeadInstructions();
  3818. ++NumReplaced;
  3819. } else {
  3820. // Add debug info for flattened globals.
  3821. if (m_HasDbgInfo && GV != EltGV) {
  3822. DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
  3823. Type *Ty = EltGV->getType()->getElementType();
  3824. unsigned size = DL.getTypeAllocSizeInBits(Ty);
  3825. unsigned align = DL.getPrefTypeAlignment(Ty);
  3826. HLModule::CreateElementGlobalVariableDebugInfo(
  3827. GV, Finder, EltGV, size, align, debugOffset,
  3828. EltNameMap[EltGV]);
  3829. debugOffset += size;
  3830. }
  3831. }
  3832. }
  3833. DeleteDeadInstructions();
  3834. if (GV->user_empty()) {
  3835. GV->removeDeadConstantUsers();
  3836. GV->eraseFromParent();
  3837. }
  3838. }
  3839. static DxilFieldAnnotation &GetEltAnnotation(Type *Ty, unsigned idx, DxilFieldAnnotation &annotation, DxilTypeSystem &dxilTypeSys) {
  3840. while (Ty->isArrayTy())
  3841. Ty = Ty->getArrayElementType();
  3842. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  3843. if (HLMatrixType::isa(Ty))
  3844. return annotation;
  3845. DxilStructAnnotation *SA = dxilTypeSys.GetStructAnnotation(ST);
  3846. if (SA) {
  3847. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(idx);
  3848. return FA;
  3849. }
  3850. }
  3851. return annotation;
  3852. }
  3853. // Note: Semantic index allocation.
  3854. // Semantic index is allocated base on linear layout.
  3855. // For following code
  3856. /*
  3857. struct S {
  3858. float4 m;
  3859. float4 m2;
  3860. };
  3861. S s[2] : semantic;
  3862. struct S2 {
  3863. float4 m[2];
  3864. float4 m2[2];
  3865. };
  3866. S2 s2 : semantic;
  3867. */
  3868. // The semantic index is like this:
  3869. // s[0].m : semantic0
  3870. // s[0].m2 : semantic1
  3871. // s[1].m : semantic2
  3872. // s[1].m2 : semantic3
  3873. // s2.m[0] : semantic0
  3874. // s2.m[1] : semantic1
  3875. // s2.m2[0] : semantic2
  3876. // s2.m2[1] : semantic3
  3877. // But when flatten argument, the result is like this:
  3878. // float4 s_m[2], float4 s_m2[2].
  3879. // float4 s2_m[2], float4 s2_m2[2].
  3880. // To do the allocation, need to map from each element to its flattened argument.
  3881. // Say arg index of float4 s_m[2] is 0, float4 s_m2[2] is 1.
  3882. // Need to get 0 from s[0].m and s[1].m, get 1 from s[0].m2 and s[1].m2.
  3883. // Allocate the argments with same semantic string from type where the
  3884. // semantic starts( S2 for s2.m[2] and s2.m2[2]).
  3885. // Iterate each elements of the type, save the semantic index and update it.
  3886. // The map from element to the arg ( s[0].m2 -> s.m2[2]) is done by argIdx.
  3887. // ArgIdx only inc by 1 when finish a struct field.
  3888. static unsigned AllocateSemanticIndex(
  3889. Type *Ty, unsigned &semIndex, unsigned argIdx, unsigned endArgIdx,
  3890. std::vector<DxilParameterAnnotation> &FlatAnnotationList) {
  3891. if (Ty->isPointerTy()) {
  3892. return AllocateSemanticIndex(Ty->getPointerElementType(), semIndex, argIdx,
  3893. endArgIdx, FlatAnnotationList);
  3894. } else if (Ty->isArrayTy()) {
  3895. unsigned arraySize = Ty->getArrayNumElements();
  3896. unsigned updatedArgIdx = argIdx;
  3897. Type *EltTy = Ty->getArrayElementType();
  3898. for (unsigned i = 0; i < arraySize; i++) {
  3899. updatedArgIdx = AllocateSemanticIndex(EltTy, semIndex, argIdx, endArgIdx,
  3900. FlatAnnotationList);
  3901. }
  3902. return updatedArgIdx;
  3903. } else if (Ty->isStructTy() && !HLMatrixType::isa(Ty)) {
  3904. unsigned fieldsCount = Ty->getStructNumElements();
  3905. for (unsigned i = 0; i < fieldsCount; i++) {
  3906. Type *EltTy = Ty->getStructElementType(i);
  3907. argIdx = AllocateSemanticIndex(EltTy, semIndex, argIdx, endArgIdx,
  3908. FlatAnnotationList);
  3909. if (!(EltTy->isStructTy() && !HLMatrixType::isa(EltTy))) {
  3910. // Update argIdx only when it is a leaf node.
  3911. argIdx++;
  3912. }
  3913. }
  3914. return argIdx;
  3915. } else {
  3916. DXASSERT(argIdx < endArgIdx, "arg index out of bound");
  3917. DxilParameterAnnotation &paramAnnotation = FlatAnnotationList[argIdx];
  3918. // Get element size.
  3919. unsigned rows = 1;
  3920. if (paramAnnotation.HasMatrixAnnotation()) {
  3921. const DxilMatrixAnnotation &matrix =
  3922. paramAnnotation.GetMatrixAnnotation();
  3923. if (matrix.Orientation == MatrixOrientation::RowMajor) {
  3924. rows = matrix.Rows;
  3925. } else {
  3926. DXASSERT_NOMSG(matrix.Orientation == MatrixOrientation::ColumnMajor);
  3927. rows = matrix.Cols;
  3928. }
  3929. }
  3930. // Save semIndex.
  3931. for (unsigned i = 0; i < rows; i++)
  3932. paramAnnotation.AppendSemanticIndex(semIndex + i);
  3933. // Update semIndex.
  3934. semIndex += rows;
  3935. return argIdx;
  3936. }
  3937. }
  3938. void SROA_Parameter_HLSL::allocateSemanticIndex(
  3939. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  3940. unsigned startArgIndex, llvm::StringMap<Type *> &semanticTypeMap) {
  3941. unsigned endArgIndex = FlatAnnotationList.size();
  3942. // Allocate semantic index.
  3943. for (unsigned i = startArgIndex; i < endArgIndex; ++i) {
  3944. // Group by semantic names.
  3945. DxilParameterAnnotation &flatParamAnnotation = FlatAnnotationList[i];
  3946. const std::string &semantic = flatParamAnnotation.GetSemanticString();
  3947. // If semantic is undefined, an error will be emitted elsewhere. For now,
  3948. // we should avoid asserting.
  3949. if (semantic.empty())
  3950. continue;
  3951. StringRef baseSemName; // The 'FOO' in 'FOO1'.
  3952. uint32_t semIndex; // The '1' in 'FOO1'
  3953. // Split semName and index.
  3954. Semantic::DecomposeNameAndIndex(semantic, &baseSemName, &semIndex);
  3955. unsigned semGroupEnd = i + 1;
  3956. while (semGroupEnd < endArgIndex &&
  3957. FlatAnnotationList[semGroupEnd].GetSemanticString() == ContinuedPseudoSemantic) {
  3958. FlatAnnotationList[semGroupEnd].SetSemanticString(baseSemName);
  3959. ++semGroupEnd;
  3960. }
  3961. DXASSERT(semanticTypeMap.count(semantic) > 0, "Must has semantic type");
  3962. Type *semanticTy = semanticTypeMap[semantic];
  3963. AllocateSemanticIndex(semanticTy, semIndex, /*argIdx*/ i,
  3964. /*endArgIdx*/ semGroupEnd, FlatAnnotationList);
  3965. // Update i.
  3966. i = semGroupEnd - 1;
  3967. }
  3968. }
  3969. //
  3970. // Cast parameters.
  3971. //
  3972. static void CopyHandleToResourcePtr(Value *Handle, Value *ResPtr, HLModule &HLM,
  3973. IRBuilder<> &Builder) {
  3974. // Cast it to resource.
  3975. Type *ResTy = ResPtr->getType()->getPointerElementType();
  3976. Value *Res = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  3977. (unsigned)HLCastOpcode::HandleToResCast,
  3978. ResTy, {Handle}, *HLM.GetModule());
  3979. // Store casted resource to OldArg.
  3980. Builder.CreateStore(Res, ResPtr);
  3981. }
  3982. static void CopyHandlePtrToResourcePtr(Value *HandlePtr, Value *ResPtr,
  3983. HLModule &HLM, IRBuilder<> &Builder) {
  3984. // Load the handle.
  3985. Value *Handle = Builder.CreateLoad(HandlePtr);
  3986. CopyHandleToResourcePtr(Handle, ResPtr, HLM, Builder);
  3987. }
  3988. static Value *CastResourcePtrToHandle(Value *Res, Type *HandleTy, HLModule &HLM,
  3989. IRBuilder<> &Builder) {
  3990. // Load OldArg.
  3991. Value *LdRes = Builder.CreateLoad(Res);
  3992. Value *Handle = HLM.EmitHLOperationCall(
  3993. Builder, HLOpcodeGroup::HLCreateHandle,
  3994. /*opcode*/ 0, HandleTy, {LdRes}, *HLM.GetModule());
  3995. return Handle;
  3996. }
  3997. static void CopyResourcePtrToHandlePtr(Value *Res, Value *HandlePtr,
  3998. HLModule &HLM, IRBuilder<> &Builder) {
  3999. Type *HandleTy = HandlePtr->getType()->getPointerElementType();
  4000. Value *Handle = CastResourcePtrToHandle(Res, HandleTy, HLM, Builder);
  4001. Builder.CreateStore(Handle, HandlePtr);
  4002. }
  4003. static void CopyVectorPtrToEltsPtr(Value *VecPtr, ArrayRef<Value *> elts,
  4004. unsigned vecSize, IRBuilder<> &Builder) {
  4005. Value *Vec = Builder.CreateLoad(VecPtr);
  4006. for (unsigned i = 0; i < vecSize; i++) {
  4007. Value *Elt = Builder.CreateExtractElement(Vec, i);
  4008. Builder.CreateStore(Elt, elts[i]);
  4009. }
  4010. }
  4011. static void CopyEltsPtrToVectorPtr(ArrayRef<Value *> elts, Value *VecPtr,
  4012. Type *VecTy, unsigned vecSize,
  4013. IRBuilder<> &Builder) {
  4014. Value *Vec = UndefValue::get(VecTy);
  4015. for (unsigned i = 0; i < vecSize; i++) {
  4016. Value *Elt = Builder.CreateLoad(elts[i]);
  4017. Vec = Builder.CreateInsertElement(Vec, Elt, i);
  4018. }
  4019. Builder.CreateStore(Vec, VecPtr);
  4020. }
  4021. static void CopyMatToArrayPtr(Value *Mat, Value *ArrayPtr,
  4022. unsigned arrayBaseIdx, HLModule &HLM,
  4023. IRBuilder<> &Builder, bool bRowMajor) {
  4024. // Mat val is row major.
  4025. HLMatrixType MatTy = HLMatrixType::cast(Mat->getType());
  4026. Type *VecTy = MatTy.getLoweredVectorTypeForReg();
  4027. Value *Vec =
  4028. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4029. (unsigned)HLCastOpcode::RowMatrixToVecCast, VecTy,
  4030. {Mat}, *HLM.GetModule());
  4031. Value *zero = Builder.getInt32(0);
  4032. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4033. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4034. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  4035. Value *Elt = Builder.CreateExtractElement(Vec, matIdx);
  4036. Value *Ptr = Builder.CreateInBoundsGEP(
  4037. ArrayPtr, {zero, Builder.getInt32(arrayBaseIdx + matIdx)});
  4038. Builder.CreateStore(Elt, Ptr);
  4039. }
  4040. }
  4041. }
  4042. static void CopyMatPtrToArrayPtr(Value *MatPtr, Value *ArrayPtr,
  4043. unsigned arrayBaseIdx, HLModule &HLM,
  4044. IRBuilder<> &Builder, bool bRowMajor) {
  4045. Type *Ty = MatPtr->getType()->getPointerElementType();
  4046. Value *Mat = nullptr;
  4047. if (bRowMajor) {
  4048. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4049. (unsigned)HLMatLoadStoreOpcode::RowMatLoad,
  4050. Ty, {MatPtr}, *HLM.GetModule());
  4051. } else {
  4052. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4053. (unsigned)HLMatLoadStoreOpcode::ColMatLoad,
  4054. Ty, {MatPtr}, *HLM.GetModule());
  4055. // Matrix value should be row major.
  4056. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4057. (unsigned)HLCastOpcode::ColMatrixToRowMatrix,
  4058. Ty, {Mat}, *HLM.GetModule());
  4059. }
  4060. CopyMatToArrayPtr(Mat, ArrayPtr, arrayBaseIdx, HLM, Builder, bRowMajor);
  4061. }
  4062. static Value *LoadArrayPtrToMat(Value *ArrayPtr, unsigned arrayBaseIdx,
  4063. Type *Ty, HLModule &HLM, IRBuilder<> &Builder,
  4064. bool bRowMajor) {
  4065. HLMatrixType MatTy = HLMatrixType::cast(Ty);
  4066. // HLInit operands are in row major.
  4067. SmallVector<Value *, 16> Elts;
  4068. Value *zero = Builder.getInt32(0);
  4069. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4070. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4071. unsigned matIdx = bRowMajor
  4072. ? MatTy.getRowMajorIndex(r, c)
  4073. : MatTy.getColumnMajorIndex(r, c);
  4074. Value *Ptr = Builder.CreateInBoundsGEP(
  4075. ArrayPtr, {zero, Builder.getInt32(arrayBaseIdx + matIdx)});
  4076. Value *Elt = Builder.CreateLoad(Ptr);
  4077. Elts.emplace_back(Elt);
  4078. }
  4079. }
  4080. return HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLInit,
  4081. /*opcode*/ 0, Ty, {Elts}, *HLM.GetModule());
  4082. }
  4083. static void CopyArrayPtrToMatPtr(Value *ArrayPtr, unsigned arrayBaseIdx,
  4084. Value *MatPtr, HLModule &HLM,
  4085. IRBuilder<> &Builder, bool bRowMajor) {
  4086. Type *Ty = MatPtr->getType()->getPointerElementType();
  4087. Value *Mat =
  4088. LoadArrayPtrToMat(ArrayPtr, arrayBaseIdx, Ty, HLM, Builder, bRowMajor);
  4089. if (bRowMajor) {
  4090. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4091. (unsigned)HLMatLoadStoreOpcode::RowMatStore, Ty,
  4092. {MatPtr, Mat}, *HLM.GetModule());
  4093. } else {
  4094. // Mat is row major.
  4095. // Cast it to col major before store.
  4096. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4097. (unsigned)HLCastOpcode::RowMatrixToColMatrix,
  4098. Ty, {Mat}, *HLM.GetModule());
  4099. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4100. (unsigned)HLMatLoadStoreOpcode::ColMatStore, Ty,
  4101. {MatPtr, Mat}, *HLM.GetModule());
  4102. }
  4103. }
  4104. using CopyFunctionTy = void(Value *FromPtr, Value *ToPtr, HLModule &HLM,
  4105. Type *HandleTy, IRBuilder<> &Builder,
  4106. bool bRowMajor);
  4107. static void
  4108. CastCopyArrayMultiDimTo1Dim(Value *FromArray, Value *ToArray, Type *CurFromTy,
  4109. std::vector<Value *> &idxList, unsigned calcIdx,
  4110. Type *HandleTy, HLModule &HLM, IRBuilder<> &Builder,
  4111. CopyFunctionTy CastCopyFn, bool bRowMajor) {
  4112. if (CurFromTy->isVectorTy()) {
  4113. // Copy vector to array.
  4114. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4115. Value *V = Builder.CreateLoad(FromPtr);
  4116. unsigned vecSize = CurFromTy->getVectorNumElements();
  4117. Value *zeroIdx = Builder.getInt32(0);
  4118. for (unsigned i = 0; i < vecSize; i++) {
  4119. Value *ToPtr = Builder.CreateInBoundsGEP(
  4120. ToArray, {zeroIdx, Builder.getInt32(calcIdx++)});
  4121. Value *Elt = Builder.CreateExtractElement(V, i);
  4122. Builder.CreateStore(Elt, ToPtr);
  4123. }
  4124. } else if (HLMatrixType MatTy = HLMatrixType::dyn_cast(CurFromTy)) {
  4125. // Copy matrix to array.
  4126. // Calculate the offset.
  4127. unsigned offset = calcIdx * MatTy.getNumElements();
  4128. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4129. CopyMatPtrToArrayPtr(FromPtr, ToArray, offset, HLM, Builder, bRowMajor);
  4130. } else if (!CurFromTy->isArrayTy()) {
  4131. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4132. Value *ToPtr = Builder.CreateInBoundsGEP(
  4133. ToArray, {Builder.getInt32(0), Builder.getInt32(calcIdx)});
  4134. CastCopyFn(FromPtr, ToPtr, HLM, HandleTy, Builder, bRowMajor);
  4135. } else {
  4136. unsigned size = CurFromTy->getArrayNumElements();
  4137. Type *FromEltTy = CurFromTy->getArrayElementType();
  4138. for (unsigned i = 0; i < size; i++) {
  4139. idxList.push_back(Builder.getInt32(i));
  4140. unsigned idx = calcIdx * size + i;
  4141. CastCopyArrayMultiDimTo1Dim(FromArray, ToArray, FromEltTy, idxList, idx,
  4142. HandleTy, HLM, Builder, CastCopyFn,
  4143. bRowMajor);
  4144. idxList.pop_back();
  4145. }
  4146. }
  4147. }
  4148. static void
  4149. CastCopyArray1DimToMultiDim(Value *FromArray, Value *ToArray, Type *CurToTy,
  4150. std::vector<Value *> &idxList, unsigned calcIdx,
  4151. Type *HandleTy, HLModule &HLM, IRBuilder<> &Builder,
  4152. CopyFunctionTy CastCopyFn, bool bRowMajor) {
  4153. if (CurToTy->isVectorTy()) {
  4154. // Copy array to vector.
  4155. Value *V = UndefValue::get(CurToTy);
  4156. unsigned vecSize = CurToTy->getVectorNumElements();
  4157. // Calculate the offset.
  4158. unsigned offset = calcIdx * vecSize;
  4159. Value *zeroIdx = Builder.getInt32(0);
  4160. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4161. for (unsigned i = 0; i < vecSize; i++) {
  4162. Value *FromPtr = Builder.CreateInBoundsGEP(
  4163. FromArray, {zeroIdx, Builder.getInt32(offset++)});
  4164. Value *Elt = Builder.CreateLoad(FromPtr);
  4165. V = Builder.CreateInsertElement(V, Elt, i);
  4166. }
  4167. Builder.CreateStore(V, ToPtr);
  4168. } else if (HLMatrixType MatTy = HLMatrixType::cast(CurToTy)) {
  4169. // Copy array to matrix.
  4170. // Calculate the offset.
  4171. unsigned offset = calcIdx * MatTy.getNumElements();
  4172. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4173. CopyArrayPtrToMatPtr(FromArray, offset, ToPtr, HLM, Builder, bRowMajor);
  4174. } else if (!CurToTy->isArrayTy()) {
  4175. Value *FromPtr = Builder.CreateInBoundsGEP(
  4176. FromArray, {Builder.getInt32(0), Builder.getInt32(calcIdx)});
  4177. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4178. CastCopyFn(FromPtr, ToPtr, HLM, HandleTy, Builder, bRowMajor);
  4179. } else {
  4180. unsigned size = CurToTy->getArrayNumElements();
  4181. Type *ToEltTy = CurToTy->getArrayElementType();
  4182. for (unsigned i = 0; i < size; i++) {
  4183. idxList.push_back(Builder.getInt32(i));
  4184. unsigned idx = calcIdx * size + i;
  4185. CastCopyArray1DimToMultiDim(FromArray, ToArray, ToEltTy, idxList, idx,
  4186. HandleTy, HLM, Builder, CastCopyFn,
  4187. bRowMajor);
  4188. idxList.pop_back();
  4189. }
  4190. }
  4191. }
  4192. static void CastCopyOldPtrToNewPtr(Value *OldPtr, Value *NewPtr, HLModule &HLM,
  4193. Type *HandleTy, IRBuilder<> &Builder,
  4194. bool bRowMajor) {
  4195. Type *NewTy = NewPtr->getType()->getPointerElementType();
  4196. Type *OldTy = OldPtr->getType()->getPointerElementType();
  4197. if (NewTy == HandleTy) {
  4198. CopyResourcePtrToHandlePtr(OldPtr, NewPtr, HLM, Builder);
  4199. } else if (OldTy->isVectorTy()) {
  4200. // Copy vector to array.
  4201. Value *V = Builder.CreateLoad(OldPtr);
  4202. unsigned vecSize = OldTy->getVectorNumElements();
  4203. Value *zeroIdx = Builder.getInt32(0);
  4204. for (unsigned i = 0; i < vecSize; i++) {
  4205. Value *EltPtr = Builder.CreateGEP(NewPtr, {zeroIdx, Builder.getInt32(i)});
  4206. Value *Elt = Builder.CreateExtractElement(V, i);
  4207. Builder.CreateStore(Elt, EltPtr);
  4208. }
  4209. } else if (HLMatrixType::isa(OldTy)) {
  4210. CopyMatPtrToArrayPtr(OldPtr, NewPtr, /*arrayBaseIdx*/ 0, HLM, Builder,
  4211. bRowMajor);
  4212. } else if (OldTy->isArrayTy()) {
  4213. std::vector<Value *> idxList;
  4214. idxList.emplace_back(Builder.getInt32(0));
  4215. CastCopyArrayMultiDimTo1Dim(OldPtr, NewPtr, OldTy, idxList, /*calcIdx*/ 0,
  4216. HandleTy, HLM, Builder, CastCopyOldPtrToNewPtr,
  4217. bRowMajor);
  4218. }
  4219. }
  4220. static void CastCopyNewPtrToOldPtr(Value *NewPtr, Value *OldPtr, HLModule &HLM,
  4221. Type *HandleTy, IRBuilder<> &Builder,
  4222. bool bRowMajor) {
  4223. Type *NewTy = NewPtr->getType()->getPointerElementType();
  4224. Type *OldTy = OldPtr->getType()->getPointerElementType();
  4225. if (NewTy == HandleTy) {
  4226. CopyHandlePtrToResourcePtr(NewPtr, OldPtr, HLM, Builder);
  4227. } else if (OldTy->isVectorTy()) {
  4228. // Copy array to vector.
  4229. Value *V = UndefValue::get(OldTy);
  4230. unsigned vecSize = OldTy->getVectorNumElements();
  4231. Value *zeroIdx = Builder.getInt32(0);
  4232. for (unsigned i = 0; i < vecSize; i++) {
  4233. Value *EltPtr = Builder.CreateGEP(NewPtr, {zeroIdx, Builder.getInt32(i)});
  4234. Value *Elt = Builder.CreateLoad(EltPtr);
  4235. V = Builder.CreateInsertElement(V, Elt, i);
  4236. }
  4237. Builder.CreateStore(V, OldPtr);
  4238. } else if (HLMatrixType::isa(OldTy)) {
  4239. CopyArrayPtrToMatPtr(NewPtr, /*arrayBaseIdx*/ 0, OldPtr, HLM, Builder,
  4240. bRowMajor);
  4241. } else if (OldTy->isArrayTy()) {
  4242. std::vector<Value *> idxList;
  4243. idxList.emplace_back(Builder.getInt32(0));
  4244. CastCopyArray1DimToMultiDim(NewPtr, OldPtr, OldTy, idxList, /*calcIdx*/ 0,
  4245. HandleTy, HLM, Builder, CastCopyNewPtrToOldPtr,
  4246. bRowMajor);
  4247. }
  4248. }
  4249. void SROA_Parameter_HLSL::replaceCastParameter(
  4250. Value *NewParam, Value *OldParam, Function &F, Argument *Arg,
  4251. const DxilParamInputQual inputQual, IRBuilder<> &Builder) {
  4252. Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
  4253. Type *NewTy = NewParam->getType();
  4254. Type *OldTy = OldParam->getType();
  4255. bool bIn = inputQual == DxilParamInputQual::Inout ||
  4256. inputQual == DxilParamInputQual::In;
  4257. bool bOut = inputQual == DxilParamInputQual::Inout ||
  4258. inputQual == DxilParamInputQual::Out;
  4259. // Make sure InsertPoint after OldParam inst.
  4260. if (Instruction *I = dyn_cast<Instruction>(OldParam)) {
  4261. Builder.SetInsertPoint(I->getNextNode());
  4262. }
  4263. if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(OldParam)) {
  4264. // Add debug info to new param.
  4265. DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
  4266. DIExpression *DDIExp = DDI->getExpression();
  4267. DIB.insertDeclare(NewParam, DDI->getVariable(), DDIExp, DDI->getDebugLoc(),
  4268. Builder.GetInsertPoint());
  4269. }
  4270. if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
  4271. // OldParam will be removed with Old function.
  4272. // Create alloca to replace it.
  4273. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
  4274. Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
  4275. OldParam->replaceAllUsesWith(AllocParam);
  4276. OldParam = AllocParam;
  4277. }
  4278. if (NewTy == HandleTy) {
  4279. CopyHandleToResourcePtr(NewParam, OldParam, *m_pHLModule, Builder);
  4280. } else if (vectorEltsMap.count(NewParam)) {
  4281. // Vector is flattened to scalars.
  4282. Type *VecTy = OldTy;
  4283. if (VecTy->isPointerTy())
  4284. VecTy = VecTy->getPointerElementType();
  4285. // Flattened vector.
  4286. SmallVector<Value *, 4> &elts = vectorEltsMap[NewParam];
  4287. unsigned vecSize = elts.size();
  4288. if (NewTy->isPointerTy()) {
  4289. if (bIn) {
  4290. // Copy NewParam to OldParam at entry.
  4291. CopyEltsPtrToVectorPtr(elts, OldParam, VecTy, vecSize, Builder);
  4292. }
  4293. // bOut must be true here.
  4294. // Store the OldParam to NewParam before every return.
  4295. for (auto &BB : F.getBasicBlockList()) {
  4296. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  4297. IRBuilder<> RetBuilder(RI);
  4298. CopyVectorPtrToEltsPtr(OldParam, elts, vecSize, RetBuilder);
  4299. }
  4300. }
  4301. } else {
  4302. // Must be in parameter.
  4303. // Copy NewParam to OldParam at entry.
  4304. Value *Vec = UndefValue::get(VecTy);
  4305. for (unsigned i = 0; i < vecSize; i++) {
  4306. Vec = Builder.CreateInsertElement(Vec, elts[i], i);
  4307. }
  4308. if (OldTy->isPointerTy()) {
  4309. Builder.CreateStore(Vec, OldParam);
  4310. } else {
  4311. OldParam->replaceAllUsesWith(Vec);
  4312. }
  4313. }
  4314. // Don't need elts anymore.
  4315. vectorEltsMap.erase(NewParam);
  4316. } else if (!NewTy->isPointerTy()) {
  4317. // Ptr param is cast to non-ptr param.
  4318. // Must be in param.
  4319. // Store NewParam to OldParam at entry.
  4320. Builder.CreateStore(NewParam, OldParam);
  4321. } else if (HLMatrixType::isa(OldTy)) {
  4322. bool bRowMajor = castRowMajorParamMap.count(NewParam);
  4323. Value *Mat = LoadArrayPtrToMat(NewParam, /*arrayBaseIdx*/ 0, OldTy,
  4324. *m_pHLModule, Builder, bRowMajor);
  4325. OldParam->replaceAllUsesWith(Mat);
  4326. } else {
  4327. bool bRowMajor = castRowMajorParamMap.count(NewParam);
  4328. // NewTy is pointer type.
  4329. if (bIn) {
  4330. // Copy NewParam to OldParam at entry.
  4331. CastCopyNewPtrToOldPtr(NewParam, OldParam, *m_pHLModule, HandleTy,
  4332. Builder, bRowMajor);
  4333. }
  4334. if (bOut) {
  4335. // Store the OldParam to NewParam before every return.
  4336. for (auto &BB : F.getBasicBlockList()) {
  4337. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  4338. IRBuilder<> RetBuilder(RI);
  4339. CastCopyOldPtrToNewPtr(OldParam, NewParam, *m_pHLModule, HandleTy,
  4340. RetBuilder, bRowMajor);
  4341. }
  4342. }
  4343. }
  4344. }
  4345. }
  4346. Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
  4347. Value *V, Type *Ty, bool bOut,
  4348. DxilParamInputQual inputQual,
  4349. IRBuilder<> &Builder) {
  4350. Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
  4351. Module &M = *m_pHLModule->GetModule();
  4352. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  4353. // Lower resource type to handle ty.
  4354. if (dxilutil::IsHLSLResourceType(Ty)) {
  4355. Value *Res = V;
  4356. if (!bOut) {
  4357. Value *LdRes = Builder.CreateLoad(Res);
  4358. V = m_pHLModule->EmitHLOperationCall(Builder,
  4359. HLOpcodeGroup::HLCreateHandle,
  4360. /*opcode*/ 0, HandleTy, { LdRes }, M);
  4361. }
  4362. else {
  4363. V = AllocaBuilder.CreateAlloca(HandleTy);
  4364. }
  4365. castParamMap[V] = std::make_pair(Res, inputQual);
  4366. }
  4367. else if (Ty->isArrayTy()) {
  4368. unsigned arraySize = 1;
  4369. Type *AT = Ty;
  4370. while (AT->isArrayTy()) {
  4371. arraySize *= AT->getArrayNumElements();
  4372. AT = AT->getArrayElementType();
  4373. }
  4374. if (dxilutil::IsHLSLResourceType(AT)) {
  4375. Value *Res = V;
  4376. Type *Ty = ArrayType::get(HandleTy, arraySize);
  4377. V = AllocaBuilder.CreateAlloca(Ty);
  4378. castParamMap[V] = std::make_pair(Res, inputQual);
  4379. }
  4380. }
  4381. return V;
  4382. }
  4383. Value *SROA_Parameter_HLSL::castArgumentIfRequired(
  4384. Value *V, Type *Ty, bool bOut,
  4385. DxilParamInputQual inputQual, DxilFieldAnnotation &annotation,
  4386. IRBuilder<> &Builder,
  4387. DxilTypeSystem &TypeSys) {
  4388. Module &M = *m_pHLModule->GetModule();
  4389. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  4390. if (inputQual == DxilParamInputQual::InPayload) {
  4391. DXASSERT_NOMSG(isa<StructType>(Ty));
  4392. // Lower payload type here
  4393. StructType *LoweredTy = GetLoweredUDT(cast<StructType>(Ty), &TypeSys);
  4394. if (LoweredTy != Ty) {
  4395. Value *Ptr = AllocaBuilder.CreateAlloca(LoweredTy);
  4396. ReplaceUsesForLoweredUDT(V, Ptr);
  4397. castParamMap[V] = std::make_pair(Ptr, inputQual);
  4398. V = Ptr;
  4399. }
  4400. return V;
  4401. }
  4402. // Remove pointer for vector/scalar which is not out.
  4403. if (V->getType()->isPointerTy() && !Ty->isAggregateType() && !bOut) {
  4404. Value *Ptr = AllocaBuilder.CreateAlloca(Ty);
  4405. V->replaceAllUsesWith(Ptr);
  4406. // Create load here to make correct type.
  4407. // The Ptr will be store with correct value in replaceCastParameter.
  4408. if (Ptr->hasOneUse()) {
  4409. // Load after existing user for call arg replace.
  4410. // If not, call arg will load undef.
  4411. // This will not hurt parameter, new load is only after first load.
  4412. // It still before all the load users.
  4413. Instruction *User = cast<Instruction>(*(Ptr->user_begin()));
  4414. IRBuilder<> CallBuilder(User->getNextNode());
  4415. V = CallBuilder.CreateLoad(Ptr);
  4416. } else {
  4417. V = Builder.CreateLoad(Ptr);
  4418. }
  4419. castParamMap[V] = std::make_pair(Ptr, inputQual);
  4420. }
  4421. V = castResourceArgIfRequired(V, Ty, bOut, inputQual, Builder);
  4422. // Entry function matrix value parameter has major.
  4423. // Make sure its user use row major matrix value.
  4424. bool updateToColMajor = annotation.HasMatrixAnnotation() &&
  4425. annotation.GetMatrixAnnotation().Orientation ==
  4426. MatrixOrientation::ColumnMajor;
  4427. if (updateToColMajor) {
  4428. if (V->getType()->isPointerTy()) {
  4429. for (User *user : V->users()) {
  4430. CallInst *CI = dyn_cast<CallInst>(user);
  4431. if (!CI)
  4432. continue;
  4433. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4434. if (group != HLOpcodeGroup::HLMatLoadStore)
  4435. continue;
  4436. HLMatLoadStoreOpcode opcode =
  4437. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  4438. Type *opcodeTy = Builder.getInt32Ty();
  4439. switch (opcode) {
  4440. case HLMatLoadStoreOpcode::RowMatLoad: {
  4441. // Update matrix function opcode to col major version.
  4442. Value *rowOpArg = ConstantInt::get(
  4443. opcodeTy,
  4444. static_cast<unsigned>(HLMatLoadStoreOpcode::ColMatLoad));
  4445. CI->setOperand(HLOperandIndex::kOpcodeIdx, rowOpArg);
  4446. // Cast it to row major.
  4447. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4448. Builder, HLOpcodeGroup::HLCast,
  4449. (unsigned)HLCastOpcode::ColMatrixToRowMatrix, Ty, {CI}, M);
  4450. CI->replaceAllUsesWith(RowMat);
  4451. // Set arg to CI again.
  4452. RowMat->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, CI);
  4453. } break;
  4454. case HLMatLoadStoreOpcode::RowMatStore:
  4455. // Update matrix function opcode to col major version.
  4456. Value *rowOpArg = ConstantInt::get(
  4457. opcodeTy,
  4458. static_cast<unsigned>(HLMatLoadStoreOpcode::ColMatStore));
  4459. CI->setOperand(HLOperandIndex::kOpcodeIdx, rowOpArg);
  4460. Value *Mat = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  4461. // Cast it to col major.
  4462. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4463. Builder, HLOpcodeGroup::HLCast,
  4464. (unsigned)HLCastOpcode::RowMatrixToColMatrix, Ty, {Mat}, M);
  4465. CI->setArgOperand(HLOperandIndex::kMatStoreValOpIdx, RowMat);
  4466. break;
  4467. }
  4468. }
  4469. } else {
  4470. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4471. Builder, HLOpcodeGroup::HLCast,
  4472. (unsigned)HLCastOpcode::ColMatrixToRowMatrix, Ty, {V}, M);
  4473. V->replaceAllUsesWith(RowMat);
  4474. // Set arg to V again.
  4475. RowMat->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, V);
  4476. }
  4477. }
  4478. return V;
  4479. }
  4480. struct AnnotatedValue {
  4481. llvm::Value *Value;
  4482. DxilFieldAnnotation Annotation;
  4483. };
  4484. void SROA_Parameter_HLSL::flattenArgument(
  4485. Function *F, Value *Arg, bool bForParam,
  4486. DxilParameterAnnotation &paramAnnotation,
  4487. std::vector<Value *> &FlatParamList,
  4488. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  4489. BasicBlock *EntryBlock, DbgDeclareInst *DDI) {
  4490. std::deque<AnnotatedValue> WorkList;
  4491. WorkList.push_back({ Arg, paramAnnotation });
  4492. unsigned startArgIndex = FlatAnnotationList.size();
  4493. DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
  4494. const std::string &semantic = paramAnnotation.GetSemanticString();
  4495. DxilParamInputQual inputQual = paramAnnotation.GetParamInputQual();
  4496. bool bOut = inputQual == DxilParamInputQual::Out ||
  4497. inputQual == DxilParamInputQual::Inout ||
  4498. inputQual == DxilParamInputQual::OutStream0 ||
  4499. inputQual == DxilParamInputQual::OutStream1 ||
  4500. inputQual == DxilParamInputQual::OutStream2 ||
  4501. inputQual == DxilParamInputQual::OutStream3;
  4502. // Map from semantic string to type.
  4503. llvm::StringMap<Type *> semanticTypeMap;
  4504. // Original semantic type.
  4505. if (!semantic.empty()) {
  4506. // Unwrap top-level array if primitive
  4507. if (inputQual == DxilParamInputQual::InputPatch ||
  4508. inputQual == DxilParamInputQual::OutputPatch ||
  4509. inputQual == DxilParamInputQual::InputPrimitive) {
  4510. Type *Ty = Arg->getType();
  4511. if (Ty->isPointerTy())
  4512. Ty = Ty->getPointerElementType();
  4513. if (Ty->isArrayTy())
  4514. semanticTypeMap[semantic] = Ty->getArrayElementType();
  4515. } else {
  4516. semanticTypeMap[semantic] = Arg->getType();
  4517. }
  4518. }
  4519. std::vector<Instruction*> deadAllocas;
  4520. DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
  4521. unsigned debugOffset = 0;
  4522. const DataLayout &DL = F->getParent()->getDataLayout();
  4523. // Process the worklist
  4524. while (!WorkList.empty()) {
  4525. AnnotatedValue AV = WorkList.front();
  4526. WorkList.pop_front();
  4527. // Do not skip unused parameter.
  4528. Value *V = AV.Value;
  4529. DxilFieldAnnotation &annotation = AV.Annotation;
  4530. // We can never replace memcpy for arguments because they have an implicit
  4531. // first memcpy that happens from argument passing, and pointer analysis
  4532. // will not reveal that, especially if we've done a first SROA pass on V.
  4533. // No DomTree needed for that reason
  4534. const bool bAllowReplace = false;
  4535. SROA_Helper::LowerMemcpy(V, &annotation, dxilTypeSys, DL, nullptr /*DT */, bAllowReplace);
  4536. // Now is safe to create the IRBuilders.
  4537. // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
  4538. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
  4539. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
  4540. std::vector<Value *> Elts;
  4541. // Not flat vector for entry function currently.
  4542. bool SROAed = false;
  4543. Type *BrokenUpTy = nullptr;
  4544. uint64_t NumInstances = 1;
  4545. if (inputQual != DxilParamInputQual::InPayload) {
  4546. // DomTree isn't used by arguments
  4547. SROAed = SROA_Helper::DoScalarReplacement(
  4548. V, Elts, BrokenUpTy, NumInstances, Builder,
  4549. /*bFlatVector*/ false, annotation.IsPrecise(),
  4550. dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr);
  4551. }
  4552. if (SROAed) {
  4553. Type *Ty = V->getType()->getPointerElementType();
  4554. // Skip empty struct parameters.
  4555. if (SROA_Helper::IsEmptyStructType(Ty, dxilTypeSys)) {
  4556. SROA_Helper::MarkEmptyStructUsers(V, DeadInsts);
  4557. DeleteDeadInstructions();
  4558. continue;
  4559. }
  4560. bool precise = annotation.IsPrecise();
  4561. const std::string &semantic = annotation.GetSemanticString();
  4562. hlsl::InterpolationMode interpMode = annotation.GetInterpolationMode();
  4563. // Push Elts into workList from right to left to preserve the order.
  4564. for (unsigned ri=0;ri<Elts.size();ri++) {
  4565. unsigned i = Elts.size() - ri - 1;
  4566. DxilFieldAnnotation EltAnnotation = GetEltAnnotation(Ty, i, annotation, dxilTypeSys);
  4567. const std::string &eltSem = EltAnnotation.GetSemanticString();
  4568. if (!semantic.empty()) {
  4569. if (!eltSem.empty()) {
  4570. // It doesn't look like we can provide source location information from here
  4571. F->getContext().emitWarning(
  4572. Twine("semantic '") + eltSem + "' on field overridden by function or enclosing type");
  4573. }
  4574. // Inherit semantic from parent, but only preserve it for the first element.
  4575. // Subsequent elements are noted with a special value that gets resolved
  4576. // once the argument is completely flattened.
  4577. EltAnnotation.SetSemanticString(i == 0 ? semantic : ContinuedPseudoSemantic);
  4578. } else if (!eltSem.empty() &&
  4579. semanticTypeMap.count(eltSem) == 0) {
  4580. Type *EltTy = dxilutil::GetArrayEltTy(Ty);
  4581. DXASSERT(EltTy->isStructTy(), "must be a struct type to has semantic.");
  4582. semanticTypeMap[eltSem] = EltTy->getStructElementType(i);
  4583. }
  4584. if (precise)
  4585. EltAnnotation.SetPrecise();
  4586. if (EltAnnotation.GetInterpolationMode().GetKind() == DXIL::InterpolationMode::Undefined)
  4587. EltAnnotation.SetInterpolationMode(interpMode);
  4588. WorkList.push_front({ Elts[i], EltAnnotation });
  4589. }
  4590. ++NumReplaced;
  4591. if (Instruction *I = dyn_cast<Instruction>(V))
  4592. deadAllocas.emplace_back(I);
  4593. } else {
  4594. Type *Ty = V->getType();
  4595. if (Ty->isPointerTy())
  4596. Ty = Ty->getPointerElementType();
  4597. // Flatten array of SV_Target.
  4598. StringRef semanticStr = annotation.GetSemanticString();
  4599. if (semanticStr.upper().find("SV_TARGET") == 0 &&
  4600. Ty->isArrayTy()) {
  4601. Type *Ty = cast<ArrayType>(V->getType()->getPointerElementType());
  4602. StringRef targetStr;
  4603. unsigned targetIndex;
  4604. Semantic::DecomposeNameAndIndex(semanticStr, &targetStr, &targetIndex);
  4605. // Replace target parameter with local target.
  4606. AllocaInst *localTarget = AllocaBuilder.CreateAlloca(Ty);
  4607. V->replaceAllUsesWith(localTarget);
  4608. unsigned arraySize = 1;
  4609. std::vector<unsigned> arraySizeList;
  4610. while (Ty->isArrayTy()) {
  4611. unsigned size = Ty->getArrayNumElements();
  4612. arraySizeList.emplace_back(size);
  4613. arraySize *= size;
  4614. Ty = Ty->getArrayElementType();
  4615. }
  4616. unsigned arrayLevel = arraySizeList.size();
  4617. std::vector<unsigned> arrayIdxList(arrayLevel, 0);
  4618. // Create flattened target.
  4619. DxilFieldAnnotation EltAnnotation = annotation;
  4620. for (unsigned i=0;i<arraySize;i++) {
  4621. Value *Elt = AllocaBuilder.CreateAlloca(Ty);
  4622. EltAnnotation.SetSemanticString(targetStr.str()+std::to_string(targetIndex+i));
  4623. // Add semantic type.
  4624. semanticTypeMap[EltAnnotation.GetSemanticString()] = Ty;
  4625. WorkList.push_front({ Elt, EltAnnotation });
  4626. // Copy local target to flattened target.
  4627. std::vector<Value*> idxList(arrayLevel+1);
  4628. idxList[0] = Builder.getInt32(0);
  4629. for (unsigned idx=0;idx<arrayLevel; idx++) {
  4630. idxList[idx+1] = Builder.getInt32(arrayIdxList[idx]);
  4631. }
  4632. if (bForParam) {
  4633. // If Argument, copy before each return.
  4634. for (auto &BB : F->getBasicBlockList()) {
  4635. TerminatorInst *TI = BB.getTerminator();
  4636. if (isa<ReturnInst>(TI)) {
  4637. IRBuilder<> RetBuilder(TI);
  4638. Value *Ptr = RetBuilder.CreateGEP(localTarget, idxList);
  4639. Value *V = RetBuilder.CreateLoad(Ptr);
  4640. RetBuilder.CreateStore(V, Elt);
  4641. }
  4642. }
  4643. } else {
  4644. // Else, copy with Builder.
  4645. Value *Ptr = Builder.CreateGEP(localTarget, idxList);
  4646. Value *V = Builder.CreateLoad(Ptr);
  4647. Builder.CreateStore(V, Elt);
  4648. }
  4649. // Update arrayIdxList.
  4650. for (unsigned idx=arrayLevel;idx>0;idx--) {
  4651. arrayIdxList[idx-1]++;
  4652. if (arrayIdxList[idx-1] < arraySizeList[idx-1])
  4653. break;
  4654. arrayIdxList[idx-1] = 0;
  4655. }
  4656. }
  4657. continue;
  4658. }
  4659. // Cast vector/matrix/resource parameter.
  4660. V = castArgumentIfRequired(V, Ty, bOut, inputQual,
  4661. annotation, Builder, dxilTypeSys);
  4662. // Cannot SROA, save it to final parameter list.
  4663. FlatParamList.emplace_back(V);
  4664. // Create ParamAnnotation for V.
  4665. FlatAnnotationList.emplace_back(DxilParameterAnnotation());
  4666. DxilParameterAnnotation &flatParamAnnotation = FlatAnnotationList.back();
  4667. flatParamAnnotation.SetParamInputQual(paramAnnotation.GetParamInputQual());
  4668. flatParamAnnotation.SetInterpolationMode(annotation.GetInterpolationMode());
  4669. flatParamAnnotation.SetSemanticString(annotation.GetSemanticString());
  4670. flatParamAnnotation.SetCompType(annotation.GetCompType().GetKind());
  4671. flatParamAnnotation.SetMatrixAnnotation(annotation.GetMatrixAnnotation());
  4672. flatParamAnnotation.SetPrecise(annotation.IsPrecise());
  4673. flatParamAnnotation.SetResourceAttribute(annotation.GetResourceAttribute());
  4674. // Add debug info.
  4675. if (DDI && V != Arg) {
  4676. Value *TmpV = V;
  4677. // If V is casted, add debug into to original V.
  4678. if (castParamMap.count(V)) {
  4679. TmpV = castParamMap[V].first;
  4680. // One more level for ptr of input vector.
  4681. // It cast from ptr to non-ptr then cast to scalars.
  4682. if (castParamMap.count(TmpV)) {
  4683. TmpV = castParamMap[TmpV].first;
  4684. }
  4685. }
  4686. Type *Ty = TmpV->getType();
  4687. if (Ty->isPointerTy())
  4688. Ty = Ty->getPointerElementType();
  4689. unsigned size = DL.getTypeAllocSize(Ty);
  4690. #if 0 // HLSL Change
  4691. DIExpression *DDIExp = DIB.createBitPieceExpression(debugOffset, size);
  4692. #else // HLSL Change
  4693. Type *argTy = Arg->getType();
  4694. if (argTy->isPointerTy())
  4695. argTy = argTy->getPointerElementType();
  4696. DIExpression *DDIExp = nullptr;
  4697. if (debugOffset == 0 && DL.getTypeAllocSize(argTy) == size) {
  4698. DDIExp = DIB.createExpression();
  4699. }
  4700. else {
  4701. DDIExp = DIB.createBitPieceExpression(debugOffset * 8, size * 8);
  4702. }
  4703. #endif // HLSL Change
  4704. debugOffset += size;
  4705. DIB.insertDeclare(TmpV, DDI->getVariable(), DDIExp, DDI->getDebugLoc(),
  4706. Builder.GetInsertPoint());
  4707. }
  4708. // Flatten stream out.
  4709. if (HLModule::IsStreamOutputPtrType(V->getType())) {
  4710. // For stream output objects.
  4711. // Create a value as output value.
  4712. Type *outputType = V->getType()->getPointerElementType()->getStructElementType(0);
  4713. Value *outputVal = AllocaBuilder.CreateAlloca(outputType);
  4714. // For each stream.Append(data)
  4715. // transform into
  4716. // d = load data
  4717. // store outputVal, d
  4718. // stream.Append(outputVal)
  4719. for (User *user : V->users()) {
  4720. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4721. unsigned opcode = GetHLOpcode(CI);
  4722. if (opcode == static_cast<unsigned>(IntrinsicOp::MOP_Append)) {
  4723. // At this point, the stream append data argument might or not have been SROA'd
  4724. Value *firstDataPtr = CI->getArgOperand(HLOperandIndex::kStreamAppendDataOpIndex);
  4725. DXASSERT(firstDataPtr->getType()->isPointerTy(), "Append value must be a pointer.");
  4726. if (firstDataPtr->getType()->getPointerElementType() == outputType) {
  4727. // The data has not been SROA'd
  4728. DXASSERT(CI->getNumArgOperands() == (HLOperandIndex::kStreamAppendDataOpIndex + 1),
  4729. "Unexpected number of arguments for non-SROA'd StreamOutput.Append");
  4730. IRBuilder<> Builder(CI);
  4731. llvm::SmallVector<llvm::Value *, 16> idxList;
  4732. SplitCpy(firstDataPtr->getType(), outputVal, firstDataPtr, idxList, Builder, DL,
  4733. dxilTypeSys, &flatParamAnnotation);
  4734. CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
  4735. }
  4736. else {
  4737. // Append has been SROA'd, we might be operating on multiple values
  4738. // with types differing from the stream output type.
  4739. // Flatten store outputVal.
  4740. // Must be struct to be flatten.
  4741. IRBuilder<> Builder(CI);
  4742. llvm::SmallVector<llvm::Value *, 16> IdxList;
  4743. llvm::SmallVector<llvm::Value *, 16> EltPtrList;
  4744. llvm::SmallVector<const DxilFieldAnnotation*, 16> EltAnnotationList;
  4745. // split
  4746. SplitPtr(outputVal, IdxList, outputVal->getType(), flatParamAnnotation,
  4747. EltPtrList, EltAnnotationList, dxilTypeSys, Builder);
  4748. unsigned eltCount = CI->getNumArgOperands()-2;
  4749. DXASSERT_LOCALVAR(eltCount, eltCount == EltPtrList.size(), "invalid element count");
  4750. for (unsigned i = HLOperandIndex::kStreamAppendDataOpIndex; i < CI->getNumArgOperands(); i++) {
  4751. Value *DataPtr = CI->getArgOperand(i);
  4752. Value *EltPtr = EltPtrList[i - HLOperandIndex::kStreamAppendDataOpIndex];
  4753. const DxilFieldAnnotation *EltAnnotation = EltAnnotationList[i - HLOperandIndex::kStreamAppendDataOpIndex];
  4754. llvm::SmallVector<llvm::Value *, 16> IdxList;
  4755. SplitCpy(DataPtr->getType(), EltPtr, DataPtr, IdxList,
  4756. Builder, DL, dxilTypeSys, EltAnnotation);
  4757. CI->setArgOperand(i, EltPtr);
  4758. }
  4759. }
  4760. }
  4761. }
  4762. }
  4763. // Then split output value to generate ParamQual.
  4764. WorkList.push_front({ outputVal, annotation });
  4765. }
  4766. }
  4767. }
  4768. // Now erase any instructions that were made dead while rewriting the
  4769. // alloca.
  4770. DeleteDeadInstructions();
  4771. // Erase dead allocas after all uses deleted.
  4772. for (Instruction *I : deadAllocas)
  4773. I->eraseFromParent();
  4774. unsigned endArgIndex = FlatAnnotationList.size();
  4775. if (bForParam && startArgIndex < endArgIndex) {
  4776. DxilParamInputQual inputQual = paramAnnotation.GetParamInputQual();
  4777. if (inputQual == DxilParamInputQual::OutStream0 ||
  4778. inputQual == DxilParamInputQual::OutStream1 ||
  4779. inputQual == DxilParamInputQual::OutStream2 ||
  4780. inputQual == DxilParamInputQual::OutStream3)
  4781. startArgIndex++;
  4782. DxilParameterAnnotation &flatParamAnnotation =
  4783. FlatAnnotationList[startArgIndex];
  4784. const std::string &semantic = flatParamAnnotation.GetSemanticString();
  4785. if (!semantic.empty())
  4786. allocateSemanticIndex(FlatAnnotationList, startArgIndex,
  4787. semanticTypeMap);
  4788. }
  4789. }
  4790. static bool IsUsedAsCallArg(Value *V) {
  4791. for (User *U : V->users()) {
  4792. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  4793. Function *CalledF = CI->getCalledFunction();
  4794. HLOpcodeGroup group = GetHLOpcodeGroup(CalledF);
  4795. // Skip HL operations.
  4796. if (group != HLOpcodeGroup::NotHL ||
  4797. group == HLOpcodeGroup::HLExtIntrinsic) {
  4798. continue;
  4799. }
  4800. // Skip llvm intrinsic.
  4801. if (CalledF->isIntrinsic())
  4802. continue;
  4803. return true;
  4804. }
  4805. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
  4806. if (IsUsedAsCallArg(GEP))
  4807. return true;
  4808. }
  4809. }
  4810. return false;
  4811. }
  4812. // For function parameter which used in function call and need to be flattened.
  4813. // Replace with tmp alloca.
  4814. void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
  4815. if (F->isDeclaration())
  4816. return;
  4817. const DataLayout &DL = m_pHLModule->GetModule()->getDataLayout();
  4818. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  4819. DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
  4820. DXASSERT(pFuncAnnot, "else invalid function");
  4821. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  4822. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  4823. SmallVector<ReturnInst*, 2> retList;
  4824. for (BasicBlock &bb : F->getBasicBlockList()) {
  4825. if (ReturnInst *RI = dyn_cast<ReturnInst>(bb.getTerminator())) {
  4826. retList.emplace_back(RI);
  4827. }
  4828. }
  4829. for (Argument &arg : F->args()) {
  4830. Type *Ty = arg.getType();
  4831. // Only check pointer types.
  4832. if (!Ty->isPointerTy())
  4833. continue;
  4834. Ty = Ty->getPointerElementType();
  4835. // Skip scalar types.
  4836. if (!Ty->isAggregateType() &&
  4837. Ty->getScalarType() == Ty)
  4838. continue;
  4839. bool bUsedInCall = IsUsedAsCallArg(&arg);
  4840. if (bUsedInCall) {
  4841. // Create tmp.
  4842. Value *TmpArg = AllocaBuilder.CreateAlloca(Ty);
  4843. // Replace arg with tmp.
  4844. arg.replaceAllUsesWith(TmpArg);
  4845. DxilParameterAnnotation &paramAnnot = pFuncAnnot->GetParameterAnnotation(arg.getArgNo());
  4846. DxilParamInputQual inputQual = paramAnnot.GetParamInputQual();
  4847. unsigned size = DL.getTypeAllocSize(Ty);
  4848. // Copy between arg and tmp.
  4849. if (inputQual == DxilParamInputQual::In ||
  4850. inputQual == DxilParamInputQual::Inout) {
  4851. // copy arg to tmp.
  4852. CallInst *argToTmp = Builder.CreateMemCpy(TmpArg, &arg, size, 0);
  4853. // Split the memcpy.
  4854. MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(argToTmp), DL, nullptr,
  4855. typeSys);
  4856. }
  4857. if (inputQual == DxilParamInputQual::Out ||
  4858. inputQual == DxilParamInputQual::Inout) {
  4859. for (ReturnInst *RI : retList) {
  4860. IRBuilder<> RetBuilder(RI);
  4861. // copy tmp to arg.
  4862. CallInst *tmpToArg =
  4863. RetBuilder.CreateMemCpy(&arg, TmpArg, size, 0);
  4864. // Split the memcpy.
  4865. MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(tmpToArg), DL, nullptr,
  4866. typeSys);
  4867. }
  4868. }
  4869. // TODO: support other DxilParamInputQual.
  4870. }
  4871. }
  4872. }
  4873. /// moveFunctionBlocks - Move body of F to flatF.
  4874. void SROA_Parameter_HLSL::moveFunctionBody(Function *F, Function *flatF) {
  4875. bool updateRetType = F->getReturnType() != flatF->getReturnType();
  4876. // Splice the body of the old function right into the new function.
  4877. flatF->getBasicBlockList().splice(flatF->begin(), F->getBasicBlockList());
  4878. // Update Block uses.
  4879. if (updateRetType) {
  4880. for (BasicBlock &BB : flatF->getBasicBlockList()) {
  4881. if (updateRetType) {
  4882. // Replace ret with ret void.
  4883. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  4884. // Create store for return.
  4885. IRBuilder<> Builder(RI);
  4886. Builder.CreateRetVoid();
  4887. RI->eraseFromParent();
  4888. }
  4889. }
  4890. }
  4891. }
  4892. }
  4893. static void SplitArrayCopy(Value *V, const DataLayout &DL,
  4894. DxilTypeSystem &typeSys,
  4895. DxilFieldAnnotation *fieldAnnotation) {
  4896. for (auto U = V->user_begin(); U != V->user_end();) {
  4897. User *user = *(U++);
  4898. if (StoreInst *ST = dyn_cast<StoreInst>(user)) {
  4899. Value *ptr = ST->getPointerOperand();
  4900. Value *val = ST->getValueOperand();
  4901. IRBuilder<> Builder(ST);
  4902. SmallVector<Value *, 16> idxList;
  4903. SplitCpy(ptr->getType(), ptr, val, idxList, Builder, DL, typeSys,
  4904. fieldAnnotation);
  4905. ST->eraseFromParent();
  4906. }
  4907. }
  4908. }
  4909. static void CheckArgUsage(Value *V, bool &bLoad, bool &bStore) {
  4910. if (bLoad && bStore)
  4911. return;
  4912. for (User *user : V->users()) {
  4913. if (dyn_cast<LoadInst>(user)) {
  4914. bLoad = true;
  4915. } else if (dyn_cast<StoreInst>(user)) {
  4916. bStore = true;
  4917. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  4918. CheckArgUsage(GEP, bLoad, bStore);
  4919. } else if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4920. if (CI->getType()->isPointerTy())
  4921. CheckArgUsage(CI, bLoad, bStore);
  4922. else {
  4923. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4924. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4925. HLMatLoadStoreOpcode opcode =
  4926. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  4927. switch (opcode) {
  4928. case HLMatLoadStoreOpcode::ColMatLoad:
  4929. case HLMatLoadStoreOpcode::RowMatLoad:
  4930. bLoad = true;
  4931. break;
  4932. case HLMatLoadStoreOpcode::ColMatStore:
  4933. case HLMatLoadStoreOpcode::RowMatStore:
  4934. bStore = true;
  4935. break;
  4936. }
  4937. }
  4938. }
  4939. }
  4940. }
  4941. }
  4942. // AcceptHitAndEndSearch and IgnoreHit both will not return, but require
  4943. // outputs to have been written before the call. Do this by:
  4944. // - inject a return immediately after the call if not there already
  4945. // - LegalizeDxilInputOutputs will inject writes from temp alloca to
  4946. // outputs before each return.
  4947. // - in HLOperationLower, after lowering the intrinsic, move the intrinsic
  4948. // to just before the return.
  4949. static void InjectReturnAfterNoReturnPreserveOutput(HLModule &HLM) {
  4950. for (Function &F : HLM.GetModule()->functions()) {
  4951. if (GetHLOpcodeGroup(&F) == HLOpcodeGroup::HLIntrinsic) {
  4952. for (auto U : F.users()) {
  4953. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  4954. unsigned OpCode = GetHLOpcode(CI);
  4955. if (OpCode == (unsigned)IntrinsicOp::IOP_AcceptHitAndEndSearch ||
  4956. OpCode == (unsigned)IntrinsicOp::IOP_IgnoreHit) {
  4957. Instruction *pNextI = CI->getNextNode();
  4958. // Skip if already has a return immediatly following call
  4959. if (isa<ReturnInst>(pNextI))
  4960. continue;
  4961. // split block and add return:
  4962. BasicBlock *BB = CI->getParent();
  4963. BB->splitBasicBlock(pNextI);
  4964. TerminatorInst *Term = BB->getTerminator();
  4965. Term->eraseFromParent();
  4966. IRBuilder<> Builder(BB);
  4967. llvm::Type *RetTy = CI->getParent()->getParent()->getReturnType();
  4968. if (RetTy->isVoidTy())
  4969. Builder.CreateRetVoid();
  4970. else
  4971. Builder.CreateRet(UndefValue::get(RetTy));
  4972. }
  4973. }
  4974. }
  4975. }
  4976. }
  4977. }
  4978. // Support store to input and load from output.
  4979. static void LegalizeDxilInputOutputs(Function *F,
  4980. DxilFunctionAnnotation *EntryAnnotation,
  4981. const DataLayout &DL,
  4982. DxilTypeSystem &typeSys) {
  4983. BasicBlock &EntryBlk = F->getEntryBlock();
  4984. Module *M = F->getParent();
  4985. // Map from output to the temp created for it.
  4986. MapVector<Argument *, Value*> outputTempMap; // Need deterministic order of iteration
  4987. for (Argument &arg : F->args()) {
  4988. Type *Ty = arg.getType();
  4989. DxilParameterAnnotation &paramAnnotation = EntryAnnotation->GetParameterAnnotation(arg.getArgNo());
  4990. DxilParamInputQual qual = paramAnnotation.GetParamInputQual();
  4991. bool isColMajor = false;
  4992. // Skip arg which is not a pointer.
  4993. if (!Ty->isPointerTy()) {
  4994. if (HLMatrixType::isa(Ty)) {
  4995. // Replace matrix arg with cast to vec. It will be lowered in
  4996. // DxilGenerationPass.
  4997. isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
  4998. MatrixOrientation::ColumnMajor;
  4999. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  5000. HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
  5001. : HLCastOpcode::RowMatrixToVecCast;
  5002. Value *undefVal = UndefValue::get(Ty);
  5003. Value *Cast = HLModule::EmitHLOperationCall(
  5004. Builder, HLOpcodeGroup::HLCast, static_cast<unsigned>(opcode), Ty,
  5005. {undefVal}, *M);
  5006. arg.replaceAllUsesWith(Cast);
  5007. // Set arg as the operand.
  5008. CallInst *CI = cast<CallInst>(Cast);
  5009. CI->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, &arg);
  5010. }
  5011. continue;
  5012. }
  5013. Ty = Ty->getPointerElementType();
  5014. bool bLoad = false;
  5015. bool bStore = false;
  5016. CheckArgUsage(&arg, bLoad, bStore);
  5017. bool bStoreInputToTemp = false;
  5018. bool bLoadOutputFromTemp = false;
  5019. if (qual == DxilParamInputQual::In && bStore) {
  5020. bStoreInputToTemp = true;
  5021. } else if (qual == DxilParamInputQual::Out && bLoad) {
  5022. bLoadOutputFromTemp = true;
  5023. } else if (bLoad && bStore) {
  5024. switch (qual) {
  5025. case DxilParamInputQual::InPayload:
  5026. case DxilParamInputQual::InputPrimitive:
  5027. case DxilParamInputQual::InputPatch:
  5028. case DxilParamInputQual::OutputPatch: {
  5029. bStoreInputToTemp = true;
  5030. } break;
  5031. case DxilParamInputQual::Inout:
  5032. break;
  5033. default:
  5034. DXASSERT(0, "invalid input qual here");
  5035. }
  5036. } else if (qual == DxilParamInputQual::Inout) {
  5037. // Only replace inout when (bLoad && bStore) == false.
  5038. bLoadOutputFromTemp = true;
  5039. bStoreInputToTemp = true;
  5040. }
  5041. if (HLMatrixType::isa(Ty)) {
  5042. if (qual == DxilParamInputQual::In)
  5043. bStoreInputToTemp = bLoad;
  5044. else if (qual == DxilParamInputQual::Out)
  5045. bLoadOutputFromTemp = bStore;
  5046. else if (qual == DxilParamInputQual::Inout) {
  5047. bStoreInputToTemp = true;
  5048. bLoadOutputFromTemp = true;
  5049. }
  5050. }
  5051. if (bStoreInputToTemp || bLoadOutputFromTemp) {
  5052. IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
  5053. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
  5054. AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
  5055. // Replace all uses with temp.
  5056. arg.replaceAllUsesWith(temp);
  5057. // Copy input to temp.
  5058. if (bStoreInputToTemp) {
  5059. llvm::SmallVector<llvm::Value *, 16> idxList;
  5060. // split copy.
  5061. SplitCpy(temp->getType(), temp, &arg, idxList, Builder, DL, typeSys,
  5062. &paramAnnotation);
  5063. }
  5064. // Generate store output, temp later.
  5065. if (bLoadOutputFromTemp) {
  5066. outputTempMap[&arg] = temp;
  5067. }
  5068. }
  5069. }
  5070. for (BasicBlock &BB : F->getBasicBlockList()) {
  5071. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  5072. IRBuilder<> Builder(RI);
  5073. // Copy temp to output.
  5074. for (auto It : outputTempMap) {
  5075. Argument *output = It.first;
  5076. Value *temp = It.second;
  5077. llvm::SmallVector<llvm::Value *, 16> idxList;
  5078. DxilParameterAnnotation &paramAnnotation =
  5079. EntryAnnotation->GetParameterAnnotation(output->getArgNo());
  5080. auto Iter = Builder.GetInsertPoint();
  5081. if (RI != BB.begin())
  5082. Iter--;
  5083. // split copy.
  5084. SplitCpy(output->getType(), output, temp, idxList, Builder, DL, typeSys,
  5085. &paramAnnotation);
  5086. }
  5087. // Clone the return.
  5088. Builder.CreateRet(RI->getReturnValue());
  5089. RI->eraseFromParent();
  5090. }
  5091. }
  5092. }
  5093. void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
  5094. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  5095. DXASSERT(F == m_pHLModule->GetEntryFunction() ||
  5096. m_pHLModule->IsEntryThatUsesSignatures(F),
  5097. "otherwise, createFlattenedFunction called on library function "
  5098. "that should not be flattened.");
  5099. const DataLayout &DL = m_pHLModule->GetModule()->getDataLayout();
  5100. // Skip void (void) function.
  5101. if (F->getReturnType()->isVoidTy() && F->getArgumentList().empty()) {
  5102. return;
  5103. }
  5104. // Clear maps for cast.
  5105. castParamMap.clear();
  5106. vectorEltsMap.clear();
  5107. DxilFunctionAnnotation *funcAnnotation = m_pHLModule->GetFunctionAnnotation(F);
  5108. DXASSERT(funcAnnotation, "must find annotation for function");
  5109. std::deque<Value *> WorkList;
  5110. LLVMContext &Ctx = m_pHLModule->GetCtx();
  5111. std::unique_ptr<BasicBlock> TmpBlockForFuncDecl;
  5112. BasicBlock *EntryBlock;
  5113. if (F->isDeclaration()) {
  5114. // We still want to SROA the parameters, so creaty a dummy
  5115. // function body block to avoid special cases.
  5116. TmpBlockForFuncDecl.reset(BasicBlock::Create(Ctx));
  5117. // Create return as terminator.
  5118. IRBuilder<> RetBuilder(TmpBlockForFuncDecl.get());
  5119. RetBuilder.CreateRetVoid();
  5120. EntryBlock = TmpBlockForFuncDecl.get();
  5121. } else {
  5122. EntryBlock = &F->getEntryBlock();
  5123. }
  5124. std::vector<Value *> FlatParamList;
  5125. std::vector<DxilParameterAnnotation> FlatParamAnnotationList;
  5126. std::vector<int> FlatParamOriArgNoList;
  5127. const bool bForParamTrue = true;
  5128. // Add all argument to worklist.
  5129. for (Argument &Arg : F->args()) {
  5130. // merge GEP use for arg.
  5131. HLModule::MergeGepUse(&Arg);
  5132. unsigned prevFlatParamCount = FlatParamList.size();
  5133. DxilParameterAnnotation &paramAnnotation =
  5134. funcAnnotation->GetParameterAnnotation(Arg.getArgNo());
  5135. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(&Arg);
  5136. flattenArgument(F, &Arg, bForParamTrue, paramAnnotation, FlatParamList,
  5137. FlatParamAnnotationList, EntryBlock, DDI);
  5138. unsigned newFlatParamCount = FlatParamList.size() - prevFlatParamCount;
  5139. for (unsigned i = 0; i < newFlatParamCount; i++) {
  5140. FlatParamOriArgNoList.emplace_back(Arg.getArgNo());
  5141. }
  5142. }
  5143. Type *retType = F->getReturnType();
  5144. std::vector<Value *> FlatRetList;
  5145. std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
  5146. // Split and change to out parameter.
  5147. if (!retType->isVoidTy()) {
  5148. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
  5149. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
  5150. Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
  5151. DxilParameterAnnotation &retAnnotation =
  5152. funcAnnotation->GetRetTypeAnnotation();
  5153. Module &M = *m_pHLModule->GetModule();
  5154. Type *voidTy = Type::getVoidTy(m_pHLModule->GetCtx());
  5155. #if 0 // We don't really want this to show up in debug info.
  5156. // Create DbgDecl for the ret value.
  5157. if (DISubprogram *funcDI = getDISubprogram(F)) {
  5158. DITypeRef RetDITyRef = funcDI->getType()->getTypeArray()[0];
  5159. DITypeIdentifierMap EmptyMap;
  5160. DIType * RetDIType = RetDITyRef.resolve(EmptyMap);
  5161. DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
  5162. DILocalVariable *RetVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_arg_variable, funcDI, F->getName().str() + ".Ret", funcDI->getFile(),
  5163. funcDI->getLine(), RetDIType);
  5164. DIExpression *Expr = DIB.createExpression();
  5165. // TODO: how to get col?
  5166. DILocation *DL = DILocation::get(F->getContext(), funcDI->getLine(), 0, funcDI);
  5167. DIB.insertDeclare(retValAddr, RetVar, Expr, DL, Builder.GetInsertPoint());
  5168. }
  5169. #endif
  5170. for (BasicBlock &BB : F->getBasicBlockList()) {
  5171. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  5172. // Create store for return.
  5173. IRBuilder<> RetBuilder(RI);
  5174. if (!retAnnotation.HasMatrixAnnotation()) {
  5175. RetBuilder.CreateStore(RI->getReturnValue(), retValAddr);
  5176. } else {
  5177. bool isRowMajor = retAnnotation.GetMatrixAnnotation().Orientation ==
  5178. MatrixOrientation::RowMajor;
  5179. Value *RetVal = RI->getReturnValue();
  5180. if (!isRowMajor) {
  5181. // Matrix value is row major. ColMatStore require col major.
  5182. // Cast before store.
  5183. RetVal = HLModule::EmitHLOperationCall(
  5184. RetBuilder, HLOpcodeGroup::HLCast,
  5185. static_cast<unsigned>(HLCastOpcode::RowMatrixToColMatrix),
  5186. RetVal->getType(), {RetVal}, M);
  5187. }
  5188. unsigned opcode = static_cast<unsigned>(
  5189. isRowMajor ? HLMatLoadStoreOpcode::RowMatStore
  5190. : HLMatLoadStoreOpcode::ColMatStore);
  5191. HLModule::EmitHLOperationCall(RetBuilder,
  5192. HLOpcodeGroup::HLMatLoadStore, opcode,
  5193. voidTy, {retValAddr, RetVal}, M);
  5194. }
  5195. }
  5196. }
  5197. // Create a fake store to keep retValAddr so it can be flattened.
  5198. if (retValAddr->user_empty()) {
  5199. Builder.CreateStore(UndefValue::get(retType), retValAddr);
  5200. }
  5201. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(retValAddr);
  5202. flattenArgument(F, retValAddr, bForParamTrue,
  5203. funcAnnotation->GetRetTypeAnnotation(), FlatRetList,
  5204. FlatRetAnnotationList, EntryBlock, DDI);
  5205. const int kRetArgNo = -1;
  5206. for (unsigned i = 0; i < FlatRetList.size(); i++) {
  5207. FlatParamOriArgNoList.insert(FlatParamOriArgNoList.begin(), kRetArgNo);
  5208. }
  5209. }
  5210. // Always change return type as parameter.
  5211. // By doing this, no need to check return when generate storeOutput.
  5212. if (FlatRetList.size() ||
  5213. // For empty struct return type.
  5214. !retType->isVoidTy()) {
  5215. // Return value is flattened.
  5216. // Change return value into out parameter.
  5217. retType = Type::getVoidTy(retType->getContext());
  5218. // Merge return data info param data.
  5219. FlatParamList.insert(FlatParamList.begin(), FlatRetList.begin(), FlatRetList.end());
  5220. FlatParamAnnotationList.insert(FlatParamAnnotationList.begin(),
  5221. FlatRetAnnotationList.begin(),
  5222. FlatRetAnnotationList.end());
  5223. }
  5224. std::vector<Type *> FinalTypeList;
  5225. for (Value * arg : FlatParamList) {
  5226. FinalTypeList.emplace_back(arg->getType());
  5227. }
  5228. unsigned extraParamSize = 0;
  5229. if (m_pHLModule->HasDxilFunctionProps(F)) {
  5230. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
  5231. if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
  5232. auto &VS = funcProps.ShaderProps.VS;
  5233. Type *outFloatTy = Type::getFloatPtrTy(F->getContext());
  5234. // Add out float parameter for each clip plane.
  5235. unsigned i=0;
  5236. for (; i < DXIL::kNumClipPlanes; i++) {
  5237. if (!VS.clipPlanes[i])
  5238. break;
  5239. FinalTypeList.emplace_back(outFloatTy);
  5240. }
  5241. extraParamSize = i;
  5242. }
  5243. }
  5244. FunctionType *flatFuncTy = FunctionType::get(retType, FinalTypeList, false);
  5245. // Return if nothing changed.
  5246. if (flatFuncTy == F->getFunctionType()) {
  5247. // Copy semantic allocation.
  5248. if (!FlatParamAnnotationList.empty()) {
  5249. if (!FlatParamAnnotationList[0].GetSemanticString().empty()) {
  5250. for (unsigned i = 0; i < FlatParamAnnotationList.size(); i++) {
  5251. DxilParameterAnnotation &paramAnnotation = funcAnnotation->GetParameterAnnotation(i);
  5252. DxilParameterAnnotation &flatParamAnnotation = FlatParamAnnotationList[i];
  5253. paramAnnotation.SetSemanticIndexVec(flatParamAnnotation.GetSemanticIndexVec());
  5254. paramAnnotation.SetSemanticString(flatParamAnnotation.GetSemanticString());
  5255. }
  5256. }
  5257. }
  5258. if (!F->isDeclaration()) {
  5259. // Support store to input and load from output.
  5260. LegalizeDxilInputOutputs(F, funcAnnotation, DL, typeSys);
  5261. }
  5262. return;
  5263. }
  5264. std::string flatName = F->getName().str() + ".flat";
  5265. DXASSERT(nullptr == F->getParent()->getFunction(flatName),
  5266. "else overwriting existing function");
  5267. Function *flatF =
  5268. cast<Function>(F->getParent()->getOrInsertFunction(flatName, flatFuncTy));
  5269. funcMap[F] = flatF;
  5270. // Update function debug info.
  5271. if (DISubprogram *funcDI = getDISubprogram(F))
  5272. funcDI->replaceFunction(flatF);
  5273. // Create FunctionAnnotation for flatF.
  5274. DxilFunctionAnnotation *flatFuncAnnotation = m_pHLModule->AddFunctionAnnotation(flatF);
  5275. // Don't need to set Ret Info, flatF always return void now.
  5276. // Param Info
  5277. for (unsigned ArgNo = 0; ArgNo < FlatParamAnnotationList.size(); ++ArgNo) {
  5278. DxilParameterAnnotation &paramAnnotation = flatFuncAnnotation->GetParameterAnnotation(ArgNo);
  5279. paramAnnotation = FlatParamAnnotationList[ArgNo];
  5280. }
  5281. // Function Attr and Parameter Attr.
  5282. // Remove sret first.
  5283. if (F->hasStructRetAttr())
  5284. F->removeFnAttr(Attribute::StructRet);
  5285. for (Argument &arg : F->args()) {
  5286. if (arg.hasStructRetAttr()) {
  5287. Attribute::AttrKind SRet [] = {Attribute::StructRet};
  5288. AttributeSet SRetAS = AttributeSet::get(Ctx, arg.getArgNo() + 1, SRet);
  5289. arg.removeAttr(SRetAS);
  5290. }
  5291. }
  5292. AttributeSet AS = F->getAttributes();
  5293. AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
  5294. AttributeSet flatAS;
  5295. flatAS = flatAS.addAttributes(
  5296. Ctx, AttributeSet::FunctionIndex,
  5297. AttributeSet::get(Ctx, AttributeSet::FunctionIndex, FnAttrs));
  5298. if (!F->isDeclaration()) {
  5299. // Only set Param attribute for function has a body.
  5300. for (unsigned ArgNo = 0; ArgNo < FlatParamAnnotationList.size(); ++ArgNo) {
  5301. unsigned oriArgNo = FlatParamOriArgNoList[ArgNo] + 1;
  5302. AttrBuilder paramAttr(AS, oriArgNo);
  5303. if (oriArgNo == AttributeSet::ReturnIndex)
  5304. paramAttr.addAttribute(Attribute::AttrKind::NoAlias);
  5305. flatAS = flatAS.addAttributes(
  5306. Ctx, ArgNo + 1, AttributeSet::get(Ctx, ArgNo + 1, paramAttr));
  5307. }
  5308. }
  5309. flatF->setAttributes(flatAS);
  5310. DXASSERT_LOCALVAR(extraParamSize, flatF->arg_size() == (extraParamSize + FlatParamAnnotationList.size()), "parameter count mismatch");
  5311. // ShaderProps.
  5312. if (m_pHLModule->HasDxilFunctionProps(F)) {
  5313. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
  5314. std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
  5315. flatFuncProps->shaderKind = funcProps.shaderKind;
  5316. flatFuncProps->ShaderProps = funcProps.ShaderProps;
  5317. m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);
  5318. if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
  5319. auto &VS = funcProps.ShaderProps.VS;
  5320. unsigned clipArgIndex = FlatParamAnnotationList.size();
  5321. // Add out float SV_ClipDistance for each clip plane.
  5322. for (unsigned i = 0; i < DXIL::kNumClipPlanes; i++) {
  5323. if (!VS.clipPlanes[i])
  5324. break;
  5325. DxilParameterAnnotation &paramAnnotation =
  5326. flatFuncAnnotation->GetParameterAnnotation(clipArgIndex+i);
  5327. paramAnnotation.SetParamInputQual(DxilParamInputQual::Out);
  5328. Twine semName = Twine("SV_ClipDistance") + Twine(i);
  5329. paramAnnotation.SetSemanticString(semName.str());
  5330. paramAnnotation.SetCompType(DXIL::ComponentType::F32);
  5331. paramAnnotation.AppendSemanticIndex(i);
  5332. }
  5333. }
  5334. }
  5335. if (!F->isDeclaration()) {
  5336. // Move function body into flatF.
  5337. moveFunctionBody(F, flatF);
  5338. // Replace old parameters with flatF Arguments.
  5339. auto argIter = flatF->arg_begin();
  5340. auto flatArgIter = FlatParamList.begin();
  5341. LLVMContext &Context = F->getContext();
  5342. // Parameter cast come from begining of entry block.
  5343. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
  5344. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
  5345. while (argIter != flatF->arg_end()) {
  5346. Argument *Arg = argIter++;
  5347. if (flatArgIter == FlatParamList.end()) {
  5348. DXASSERT(extraParamSize > 0, "parameter count mismatch");
  5349. break;
  5350. }
  5351. Value *flatArg = *(flatArgIter++);
  5352. if (castParamMap.count(flatArg)) {
  5353. replaceCastParameter(flatArg, castParamMap[flatArg].first, *flatF, Arg,
  5354. castParamMap[flatArg].second, Builder);
  5355. }
  5356. // Update arg debug info.
  5357. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(flatArg);
  5358. if (DDI) {
  5359. if (!flatArg->getType()->isPointerTy()) {
  5360. // Create alloca to hold the debug info.
  5361. Value *allocaArg = nullptr;
  5362. if (flatArg->hasOneUse() && isa<StoreInst>(*flatArg->user_begin())) {
  5363. StoreInst *SI = cast<StoreInst>(*flatArg->user_begin());
  5364. allocaArg = SI->getPointerOperand();
  5365. } else {
  5366. allocaArg = AllocaBuilder.CreateAlloca(flatArg->getType());
  5367. StoreInst *initArg = Builder.CreateStore(flatArg, allocaArg);
  5368. Value *ldArg = Builder.CreateLoad(allocaArg);
  5369. flatArg->replaceAllUsesWith(ldArg);
  5370. initArg->setOperand(0, flatArg);
  5371. }
  5372. Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(allocaArg));
  5373. DDI->setArgOperand(0, VMD);
  5374. } else {
  5375. Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(Arg));
  5376. DDI->setArgOperand(0, VMD);
  5377. }
  5378. }
  5379. flatArg->replaceAllUsesWith(Arg);
  5380. if (isa<Instruction>(flatArg))
  5381. DeadInsts.emplace_back(flatArg);
  5382. HLModule::MergeGepUse(Arg);
  5383. // Flatten store of array parameter.
  5384. if (Arg->getType()->isPointerTy()) {
  5385. Type *Ty = Arg->getType()->getPointerElementType();
  5386. if (Ty->isArrayTy())
  5387. SplitArrayCopy(
  5388. Arg, DL, typeSys,
  5389. &flatFuncAnnotation->GetParameterAnnotation(Arg->getArgNo()));
  5390. }
  5391. }
  5392. // Support store to input and load from output.
  5393. LegalizeDxilInputOutputs(flatF, flatFuncAnnotation, DL, typeSys);
  5394. }
  5395. }
  5396. void SROA_Parameter_HLSL::replaceCall(Function *F, Function *flatF) {
  5397. // Update entry function.
  5398. if (F == m_pHLModule->GetEntryFunction()) {
  5399. m_pHLModule->SetEntryFunction(flatF);
  5400. }
  5401. DXASSERT(F->user_empty(), "otherwise we flattened a library function.");
  5402. }
  5403. // Public interface to the SROA_Parameter_HLSL pass
  5404. ModulePass *llvm::createSROA_Parameter_HLSL() {
  5405. return new SROA_Parameter_HLSL();
  5406. }
  5407. //===----------------------------------------------------------------------===//
  5408. // Lower static global into Alloca.
  5409. //===----------------------------------------------------------------------===//
  5410. namespace {
  5411. class LowerStaticGlobalIntoAlloca : public ModulePass {
  5412. HLModule *m_pHLModule;
  5413. DebugInfoFinder m_DbgFinder;
  5414. public:
  5415. static char ID; // Pass identification, replacement for typeid
  5416. explicit LowerStaticGlobalIntoAlloca() : ModulePass(ID) {}
  5417. const char *getPassName() const override { return "Lower static global into Alloca"; }
  5418. bool runOnModule(Module &M) override {
  5419. m_pHLModule = &M.GetOrCreateHLModule();
  5420. m_DbgFinder.processModule(M);
  5421. // Lower static global into allocas.
  5422. std::vector<GlobalVariable *> staticGVs;
  5423. for (GlobalVariable &GV : M.globals()) {
  5424. // only for non-constant static globals
  5425. if (!dxilutil::IsStaticGlobal(&GV) || GV.isConstant())
  5426. continue;
  5427. Type *EltTy = GV.getType()->getElementType();
  5428. if (!EltTy->isAggregateType()) {
  5429. staticGVs.emplace_back(&GV);
  5430. } else {
  5431. // Lower static [array of] resources
  5432. if (dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(EltTy))) {
  5433. staticGVs.emplace_back(&GV);
  5434. }
  5435. }
  5436. }
  5437. bool bUpdated = false;
  5438. const DataLayout &DL = M.getDataLayout();
  5439. for (GlobalVariable *GV : staticGVs) {
  5440. bUpdated |= lowerStaticGlobalIntoAlloca(GV, DL);
  5441. }
  5442. return bUpdated;
  5443. }
  5444. private:
  5445. bool lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL);
  5446. };
  5447. }
  5448. // Go through the base type chain of TyA and see if
  5449. // we eventually get to TyB
  5450. //
  5451. // Note: Not necessarily about inheritance. Could be
  5452. // typedef, const type, ref type, MEMBER type (TyA
  5453. // being a member of TyB).
  5454. //
  5455. static bool IsDerivedTypeOf(DIType *TyA, DIType *TyB) {
  5456. DITypeIdentifierMap EmptyMap;
  5457. while (TyA) {
  5458. if (DIDerivedType *Derived = dyn_cast<DIDerivedType>(TyA)) {
  5459. if (Derived->getBaseType() == TyB)
  5460. return true;
  5461. else
  5462. TyA = Derived->getBaseType().resolve(EmptyMap);
  5463. }
  5464. else {
  5465. break;
  5466. }
  5467. }
  5468. return false;
  5469. }
  5470. // See if 'DGV' a member type of some other variable, and return that variable
  5471. // and the offset and size DGV is into it.
  5472. //
  5473. // If DGV is not a member, just return nullptr.
  5474. //
  5475. static DIGlobalVariable *FindGlobalVariableFragment(const DebugInfoFinder &DbgFinder, DIGlobalVariable *DGV, unsigned *Out_OffsetInBits, unsigned *Out_SizeInBits) {
  5476. DITypeIdentifierMap EmptyMap;
  5477. StringRef FullName = DGV->getName();
  5478. size_t FirstDot = FullName.find_first_of('.');
  5479. if (FirstDot == StringRef::npos)
  5480. return nullptr;
  5481. StringRef BaseName = FullName.substr(0, FirstDot);
  5482. assert(BaseName.size());
  5483. DIType *Ty = DGV->getType().resolve(EmptyMap);
  5484. assert(isa<DIDerivedType>(Ty) && Ty->getTag() == dwarf::DW_TAG_member);
  5485. DIGlobalVariable *FinalResult = nullptr;
  5486. for (DIGlobalVariable *DGV_It : DbgFinder.global_variables()) {
  5487. if (DGV_It->getName() == BaseName &&
  5488. IsDerivedTypeOf(Ty, DGV_It->getType().resolve(EmptyMap)))
  5489. {
  5490. FinalResult = DGV_It;
  5491. break;
  5492. }
  5493. }
  5494. if (FinalResult) {
  5495. *Out_OffsetInBits = Ty->getOffsetInBits();
  5496. *Out_SizeInBits = Ty->getSizeInBits();
  5497. }
  5498. return FinalResult;
  5499. }
  5500. // Create a fake local variable for the GlobalVariable GV that has just been
  5501. // lowered to local Alloca.
  5502. //
  5503. static
  5504. void PatchDebugInfo(DebugInfoFinder &DbgFinder, Function *F, GlobalVariable *GV, AllocaInst *AI) {
  5505. if (!DbgFinder.compile_unit_count())
  5506. return;
  5507. // Find the subprogram for function
  5508. DISubprogram *Subprogram = nullptr;
  5509. for (DISubprogram *SP : DbgFinder.subprograms()) {
  5510. if (SP->getFunction() == F) {
  5511. Subprogram = SP;
  5512. break;
  5513. }
  5514. }
  5515. DIGlobalVariable *DGV = dxilutil::FindGlobalVariableDebugInfo(GV, DbgFinder);
  5516. if (!DGV)
  5517. return;
  5518. DITypeIdentifierMap EmptyMap;
  5519. DIBuilder DIB(*GV->getParent());
  5520. DIScope *Scope = Subprogram;
  5521. DebugLoc Loc = DebugLoc::get(0, 0, Scope);
  5522. // If the variable is a member of another variable, find the offset and size
  5523. bool IsFragment = false;
  5524. unsigned OffsetInBits = 0,
  5525. SizeInBits = 0;
  5526. if (DIGlobalVariable *UnsplitDGV = FindGlobalVariableFragment(DbgFinder, DGV, &OffsetInBits, &SizeInBits)) {
  5527. DGV = UnsplitDGV;
  5528. IsFragment = true;
  5529. }
  5530. std::string Name = "global.";
  5531. Name += DGV->getName();
  5532. // Using arg_variable instead of auto_variable because arg variables can use
  5533. // Subprogram as its scope, so we don't have to make one up for it.
  5534. llvm::dwarf::Tag Tag = llvm::dwarf::Tag::DW_TAG_arg_variable;
  5535. DIType *Ty = DGV->getType().resolve(EmptyMap);
  5536. DXASSERT(Ty->getTag() != dwarf::DW_TAG_member, "Member type is not allowed for variables.");
  5537. DILocalVariable *ConvertedLocalVar =
  5538. DIB.createLocalVariable(Tag, Scope,
  5539. Name, DGV->getFile(), DGV->getLine(), Ty);
  5540. DIExpression *Expr = nullptr;
  5541. if (IsFragment) {
  5542. Expr = DIB.createBitPieceExpression(OffsetInBits, SizeInBits);
  5543. }
  5544. else {
  5545. Expr = DIB.createExpression(ArrayRef<int64_t>());
  5546. }
  5547. DIB.insertDeclare(AI, ConvertedLocalVar, Expr, Loc, AI->getNextNode());
  5548. }
  5549. bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
  5550. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  5551. unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
  5552. hlutil::PointerStatus PS(GV, size, /*bLdStOnly*/ false);
  5553. GV->removeDeadConstantUsers();
  5554. PS.analyze(typeSys, /*bStructElt*/ false);
  5555. bool NotStored = !PS.HasStored();
  5556. // Make sure GV only used in one function.
  5557. // Skip GV which don't have store.
  5558. if (PS.HasMultipleAccessingFunctions || NotStored)
  5559. return false;
  5560. Function *F = const_cast<Function*>(PS.AccessingFunction);
  5561. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  5562. AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
  5563. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  5564. // Store initializer is exist.
  5565. if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
  5566. Builder.CreateStore(GV->getInitializer(), GV);
  5567. }
  5568. ReplaceConstantWithInst(GV, AI, Builder);
  5569. PatchDebugInfo(m_DbgFinder, F, GV, AI);
  5570. GV->eraseFromParent();
  5571. return true;
  5572. }
  5573. char LowerStaticGlobalIntoAlloca::ID = 0;
  5574. INITIALIZE_PASS(LowerStaticGlobalIntoAlloca, "static-global-to-alloca",
  5575. "Lower static global into Alloca", false,
  5576. false)
  5577. // Public interface to the LowerStaticGlobalIntoAlloca pass
  5578. ModulePass *llvm::createLowerStaticGlobalIntoAlloca() {
  5579. return new LowerStaticGlobalIntoAlloca();
  5580. }