ScalarReplAggregatesHLSL.cpp 271 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313
  1. //===- ScalarReplAggregatesHLSL.cpp - Scalar Replacement of Aggregates ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //===----------------------------------------------------------------------===//
  10. //
  11. // Based on ScalarReplAggregates.cpp. The difference is HLSL version will keep
  12. // array so it can break up all structure.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/SetVector.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/Loads.h"
  20. #include "llvm/Analysis/ValueTracking.h"
  21. #include "llvm/Analysis/PostDominators.h"
  22. #include "llvm/IR/CallSite.h"
  23. #include "llvm/IR/Constants.h"
  24. #include "llvm/IR/DIBuilder.h"
  25. #include "llvm/IR/DataLayout.h"
  26. #include "llvm/IR/DebugInfo.h"
  27. #include "llvm/IR/DerivedTypes.h"
  28. #include "llvm/IR/Dominators.h"
  29. #include "llvm/IR/Function.h"
  30. #include "llvm/IR/GetElementPtrTypeIterator.h"
  31. #include "llvm/IR/GlobalVariable.h"
  32. #include "llvm/IR/IRBuilder.h"
  33. #include "llvm/IR/Instructions.h"
  34. #include "llvm/IR/IntrinsicInst.h"
  35. #include "llvm/IR/LLVMContext.h"
  36. #include "llvm/IR/InstIterator.h"
  37. #include "llvm/IR/Module.h"
  38. #include "llvm/IR/Operator.h"
  39. #include "llvm/Pass.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/ErrorHandling.h"
  42. #include "llvm/Support/MathExtras.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. #include "llvm/Transforms/Scalar.h"
  45. #include "llvm/Transforms/Utils/Local.h"
  46. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  47. #include "llvm/Transforms/Utils/SSAUpdater.h"
  48. #include "llvm/Transforms/Utils/Local.h"
  49. #include "dxc/HLSL/HLOperations.h"
  50. #include "dxc/DXIL/DxilConstants.h"
  51. #include "dxc/HLSL/HLModule.h"
  52. #include "dxc/DXIL/DxilUtil.h"
  53. #include "dxc/DXIL/DxilModule.h"
  54. #include "dxc/HlslIntrinsicOp.h"
  55. #include "dxc/DXIL/DxilTypeSystem.h"
  56. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  57. #include "dxc/HLSL/HLMatrixType.h"
  58. #include "dxc/DXIL/DxilOperations.h"
  59. #include <deque>
  60. #include <unordered_map>
  61. #include <unordered_set>
  62. #include <queue>
  63. using namespace llvm;
  64. using namespace hlsl;
  65. #define DEBUG_TYPE "scalarreplhlsl"
  66. STATISTIC(NumReplaced, "Number of allocas broken up");
  67. STATISTIC(NumPromoted, "Number of allocas promoted");
  68. STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
  69. namespace {
  70. class SROA_Helper {
  71. public:
  72. // Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  73. // Then do SROA on V.
  74. static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
  75. IRBuilder<> &Builder, bool bFlatVector,
  76. bool hasPrecise, DxilTypeSystem &typeSys,
  77. const DataLayout &DL,
  78. SmallVector<Value *, 32> &DeadInsts);
  79. static bool DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
  80. IRBuilder<> &Builder, bool bFlatVector,
  81. bool hasPrecise, DxilTypeSystem &typeSys,
  82. const DataLayout &DL,
  83. SmallVector<Value *, 32> &DeadInsts);
  84. // Lower memcpy related to V.
  85. static bool LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
  86. DxilTypeSystem &typeSys, const DataLayout &DL,
  87. bool bAllowReplace);
  88. static void MarkEmptyStructUsers(Value *V,
  89. SmallVector<Value *, 32> &DeadInsts);
  90. static bool IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys);
  91. private:
  92. SROA_Helper(Value *V, ArrayRef<Value *> Elts,
  93. SmallVector<Value *, 32> &DeadInsts, DxilTypeSystem &ts,
  94. const DataLayout &dl)
  95. : OldVal(V), NewElts(Elts), DeadInsts(DeadInsts), typeSys(ts), DL(dl) {}
  96. void RewriteForScalarRepl(Value *V, IRBuilder<> &Builder);
  97. private:
  98. // Must be a pointer type val.
  99. Value * OldVal;
  100. // Flattened elements for OldVal.
  101. ArrayRef<Value*> NewElts;
  102. SmallVector<Value *, 32> &DeadInsts;
  103. DxilTypeSystem &typeSys;
  104. const DataLayout &DL;
  105. void RewriteForConstExpr(ConstantExpr *user, IRBuilder<> &Builder);
  106. void RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder);
  107. void RewriteForAddrSpaceCast(Value *user, IRBuilder<> &Builder);
  108. void RewriteForLoad(LoadInst *loadInst);
  109. void RewriteForStore(StoreInst *storeInst);
  110. void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV);
  111. void RewriteCall(CallInst *CI);
  112. void RewriteBitCast(BitCastInst *BCI);
  113. void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut);
  114. };
  115. struct SROA_HLSL : public FunctionPass {
  116. SROA_HLSL(bool Promote, int T, bool hasDT, char &ID, int ST, int AT, int SLT)
  117. : FunctionPass(ID), HasDomTree(hasDT), RunPromotion(Promote) {
  118. if (AT == -1)
  119. ArrayElementThreshold = 8;
  120. else
  121. ArrayElementThreshold = AT;
  122. if (SLT == -1)
  123. // Do not limit the scalar integer load size if no threshold is given.
  124. ScalarLoadThreshold = -1;
  125. else
  126. ScalarLoadThreshold = SLT;
  127. }
  128. bool runOnFunction(Function &F) override;
  129. bool performScalarRepl(Function &F, DxilTypeSystem &typeSys);
  130. bool performPromotion(Function &F);
  131. bool markPrecise(Function &F);
  132. private:
  133. bool HasDomTree;
  134. bool RunPromotion;
  135. /// DeadInsts - Keep track of instructions we have made dead, so that
  136. /// we can remove them after we are done working.
  137. SmallVector<Value *, 32> DeadInsts;
  138. /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
  139. /// information about the uses. All these fields are initialized to false
  140. /// and set to true when something is learned.
  141. struct AllocaInfo {
  142. /// The alloca to promote.
  143. AllocaInst *AI;
  144. /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
  145. /// looping and avoid redundant work.
  146. SmallPtrSet<PHINode *, 8> CheckedPHIs;
  147. /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
  148. bool isUnsafe : 1;
  149. /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
  150. bool isMemCpySrc : 1;
  151. /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
  152. bool isMemCpyDst : 1;
  153. /// hasSubelementAccess - This is true if a subelement of the alloca is
  154. /// ever accessed, or false if the alloca is only accessed with mem
  155. /// intrinsics or load/store that only access the entire alloca at once.
  156. bool hasSubelementAccess : 1;
  157. /// hasALoadOrStore - This is true if there are any loads or stores to it.
  158. /// The alloca may just be accessed with memcpy, for example, which would
  159. /// not set this.
  160. bool hasALoadOrStore : 1;
  161. /// hasArrayIndexing - This is true if there are any dynamic array
  162. /// indexing to it.
  163. bool hasArrayIndexing : 1;
  164. /// hasVectorIndexing - This is true if there are any dynamic vector
  165. /// indexing to it.
  166. bool hasVectorIndexing : 1;
  167. explicit AllocaInfo(AllocaInst *ai)
  168. : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
  169. hasSubelementAccess(false), hasALoadOrStore(false),
  170. hasArrayIndexing(false), hasVectorIndexing(false) {}
  171. };
  172. /// ArrayElementThreshold - The maximum number of elements an array can
  173. /// have to be considered for SROA.
  174. unsigned ArrayElementThreshold;
  175. /// ScalarLoadThreshold - The maximum size in bits of scalars to load when
  176. /// converting to scalar
  177. unsigned ScalarLoadThreshold;
  178. void MarkUnsafe(AllocaInfo &I, Instruction *User) {
  179. I.isUnsafe = true;
  180. DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
  181. }
  182. bool isSafeAllocaToScalarRepl(AllocaInst *AI);
  183. void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
  184. void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
  185. AllocaInfo &Info);
  186. void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
  187. void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType,
  188. bool isStore, AllocaInfo &Info, Instruction *TheAccess,
  189. bool AllowWholeAccess);
  190. bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
  191. const DataLayout &DL);
  192. void DeleteDeadInstructions();
  193. bool ShouldAttemptScalarRepl(AllocaInst *AI);
  194. };
  195. // SROA_DT - SROA that uses DominatorTree.
  196. struct SROA_DT_HLSL : public SROA_HLSL {
  197. static char ID;
  198. public:
  199. SROA_DT_HLSL(bool Promote = false, int T = -1, int ST = -1, int AT = -1, int SLT = -1)
  200. : SROA_HLSL(Promote, T, true, ID, ST, AT, SLT) {
  201. initializeSROA_DTPass(*PassRegistry::getPassRegistry());
  202. }
  203. // getAnalysisUsage - This pass does not require any passes, but we know it
  204. // will not alter the CFG, so say so.
  205. void getAnalysisUsage(AnalysisUsage &AU) const override {
  206. AU.addRequired<AssumptionCacheTracker>();
  207. AU.addRequired<DominatorTreeWrapperPass>();
  208. AU.setPreservesCFG();
  209. }
  210. };
  211. // SROA_SSAUp - SROA that uses SSAUpdater.
  212. struct SROA_SSAUp_HLSL : public SROA_HLSL {
  213. static char ID;
  214. public:
  215. SROA_SSAUp_HLSL(bool Promote = false, int T = -1, int ST = -1, int AT = -1, int SLT = -1)
  216. : SROA_HLSL(Promote, T, false, ID, ST, AT, SLT) {
  217. initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
  218. }
  219. // getAnalysisUsage - This pass does not require any passes, but we know it
  220. // will not alter the CFG, so say so.
  221. void getAnalysisUsage(AnalysisUsage &AU) const override {
  222. AU.addRequired<AssumptionCacheTracker>();
  223. AU.setPreservesCFG();
  224. }
  225. };
  226. // Simple struct to split memcpy into ld/st
  227. struct MemcpySplitter {
  228. llvm::LLVMContext &m_context;
  229. DxilTypeSystem &m_typeSys;
  230. public:
  231. MemcpySplitter(llvm::LLVMContext &context, DxilTypeSystem &typeSys)
  232. : m_context(context), m_typeSys(typeSys) {}
  233. void Split(llvm::Function &F);
  234. static void PatchMemCpyWithZeroIdxGEP(Module &M);
  235. static void PatchMemCpyWithZeroIdxGEP(MemCpyInst *MI, const DataLayout &DL);
  236. static void SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
  237. DxilFieldAnnotation *fieldAnnotation,
  238. DxilTypeSystem &typeSys,
  239. const bool bEltMemCpy = true);
  240. };
  241. }
  242. char SROA_DT_HLSL::ID = 0;
  243. char SROA_SSAUp_HLSL::ID = 0;
  244. INITIALIZE_PASS_BEGIN(SROA_DT_HLSL, "scalarreplhlsl",
  245. "Scalar Replacement of Aggregates HLSL (DT)", false,
  246. false)
  247. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  248. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  249. INITIALIZE_PASS_END(SROA_DT_HLSL, "scalarreplhlsl",
  250. "Scalar Replacement of Aggregates HLSL (DT)", false, false)
  251. INITIALIZE_PASS_BEGIN(SROA_SSAUp_HLSL, "scalarreplhlsl-ssa",
  252. "Scalar Replacement of Aggregates HLSL (SSAUp)", false,
  253. false)
  254. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  255. INITIALIZE_PASS_END(SROA_SSAUp_HLSL, "scalarreplhlsl-ssa",
  256. "Scalar Replacement of Aggregates HLSL (SSAUp)", false,
  257. false)
  258. // Public interface to the ScalarReplAggregates pass
  259. FunctionPass *llvm::createScalarReplAggregatesHLSLPass(bool UseDomTree, bool Promote) {
  260. if (UseDomTree)
  261. return new SROA_DT_HLSL(Promote);
  262. return new SROA_SSAUp_HLSL(Promote);
  263. }
  264. //===----------------------------------------------------------------------===//
  265. // Convert To Scalar Optimization.
  266. //===----------------------------------------------------------------------===//
  267. namespace {
  268. /// ConvertToScalarInfo - This class implements the "Convert To Scalar"
  269. /// optimization, which scans the uses of an alloca and determines if it can
  270. /// rewrite it in terms of a single new alloca that can be mem2reg'd.
  271. class ConvertToScalarInfo {
  272. /// AllocaSize - The size of the alloca being considered in bytes.
  273. unsigned AllocaSize;
  274. const DataLayout &DL;
  275. unsigned ScalarLoadThreshold;
  276. /// IsNotTrivial - This is set to true if there is some access to the object
  277. /// which means that mem2reg can't promote it.
  278. bool IsNotTrivial;
  279. /// ScalarKind - Tracks the kind of alloca being considered for promotion,
  280. /// computed based on the uses of the alloca rather than the LLVM type system.
  281. enum {
  282. Unknown,
  283. // Accesses via GEPs that are consistent with element access of a vector
  284. // type. This will not be converted into a vector unless there is a later
  285. // access using an actual vector type.
  286. ImplicitVector,
  287. // Accesses via vector operations and GEPs that are consistent with the
  288. // layout of a vector type.
  289. Vector,
  290. // An integer bag-of-bits with bitwise operations for insertion and
  291. // extraction. Any combination of types can be converted into this kind
  292. // of scalar.
  293. Integer
  294. } ScalarKind;
  295. /// VectorTy - This tracks the type that we should promote the vector to if
  296. /// it is possible to turn it into a vector. This starts out null, and if it
  297. /// isn't possible to turn into a vector type, it gets set to VoidTy.
  298. VectorType *VectorTy;
  299. /// HadNonMemTransferAccess - True if there is at least one access to the
  300. /// alloca that is not a MemTransferInst. We don't want to turn structs into
  301. /// large integers unless there is some potential for optimization.
  302. bool HadNonMemTransferAccess;
  303. /// HadDynamicAccess - True if some element of this alloca was dynamic.
  304. /// We don't yet have support for turning a dynamic access into a large
  305. /// integer.
  306. bool HadDynamicAccess;
  307. public:
  308. explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
  309. unsigned SLT)
  310. : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
  311. ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
  312. HadDynamicAccess(false) {}
  313. AllocaInst *TryConvert(AllocaInst *AI);
  314. private:
  315. bool CanConvertToScalar(Value *V, uint64_t Offset, Value *NonConstantIdx);
  316. void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
  317. bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
  318. void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
  319. Value *NonConstantIdx);
  320. Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset,
  321. Value *NonConstantIdx,
  322. IRBuilder<> &Builder);
  323. Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
  324. uint64_t Offset, Value *NonConstantIdx,
  325. IRBuilder<> &Builder);
  326. };
  327. } // end anonymous namespace.
  328. /// TryConvert - Analyze the specified alloca, and if it is safe to do so,
  329. /// rewrite it to be a new alloca which is mem2reg'able. This returns the new
  330. /// alloca if possible or null if not.
  331. AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
  332. // If we can't convert this scalar, or if mem2reg can trivially do it, bail
  333. // out.
  334. if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
  335. return nullptr;
  336. // If an alloca has only memset / memcpy uses, it may still have an Unknown
  337. // ScalarKind. Treat it as an Integer below.
  338. if (ScalarKind == Unknown)
  339. ScalarKind = Integer;
  340. if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
  341. ScalarKind = Integer;
  342. // If we were able to find a vector type that can handle this with
  343. // insert/extract elements, and if there was at least one use that had
  344. // a vector type, promote this to a vector. We don't want to promote
  345. // random stuff that doesn't use vectors (e.g. <9 x double>) because then
  346. // we just get a lot of insert/extracts. If at least one vector is
  347. // involved, then we probably really do have a union of vector/array.
  348. Type *NewTy;
  349. if (ScalarKind == Vector) {
  350. assert(VectorTy && "Missing type for vector scalar.");
  351. DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy
  352. << '\n');
  353. NewTy = VectorTy; // Use the vector type.
  354. } else {
  355. unsigned BitWidth = AllocaSize * 8;
  356. // Do not convert to scalar integer if the alloca size exceeds the
  357. // scalar load threshold.
  358. if (BitWidth > ScalarLoadThreshold)
  359. return nullptr;
  360. if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
  361. !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
  362. return nullptr;
  363. // Dynamic accesses on integers aren't yet supported. They need us to shift
  364. // by a dynamic amount which could be difficult to work out as we might not
  365. // know whether to use a left or right shift.
  366. if (ScalarKind == Integer && HadDynamicAccess)
  367. return nullptr;
  368. DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
  369. // Create and insert the integer alloca.
  370. NewTy = IntegerType::get(AI->getContext(), BitWidth);
  371. }
  372. AllocaInst *NewAI =
  373. new AllocaInst(NewTy, nullptr, "", AI->getParent()->begin());
  374. ConvertUsesToScalar(AI, NewAI, 0, nullptr);
  375. return NewAI;
  376. }
  377. /// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
  378. /// (VectorTy) so far at the offset specified by Offset (which is specified in
  379. /// bytes).
  380. ///
  381. /// There are two cases we handle here:
  382. /// 1) A union of vector types of the same size and potentially its elements.
  383. /// Here we turn element accesses into insert/extract element operations.
  384. /// This promotes a <4 x float> with a store of float to the third element
  385. /// into a <4 x float> that uses insert element.
  386. /// 2) A fully general blob of memory, which we turn into some (potentially
  387. /// large) integer type with extract and insert operations where the loads
  388. /// and stores would mutate the memory. We mark this by setting VectorTy
  389. /// to VoidTy.
  390. void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, uint64_t Offset) {
  391. // If we already decided to turn this into a blob of integer memory, there is
  392. // nothing to be done.
  393. if (ScalarKind == Integer)
  394. return;
  395. // If this could be contributing to a vector, analyze it.
  396. // If the In type is a vector that is the same size as the alloca, see if it
  397. // matches the existing VecTy.
  398. if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
  399. if (MergeInVectorType(VInTy, Offset))
  400. return;
  401. } else if (In->isFloatTy() || In->isDoubleTy() ||
  402. (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
  403. isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
  404. // Full width accesses can be ignored, because they can always be turned
  405. // into bitcasts.
  406. unsigned EltSize = In->getPrimitiveSizeInBits() / 8;
  407. if (EltSize == AllocaSize)
  408. return;
  409. // If we're accessing something that could be an element of a vector, see
  410. // if the implied vector agrees with what we already have and if Offset is
  411. // compatible with it.
  412. if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
  413. (!VectorTy ||
  414. EltSize == VectorTy->getElementType()->getPrimitiveSizeInBits() / 8)) {
  415. if (!VectorTy) {
  416. ScalarKind = ImplicitVector;
  417. VectorTy = VectorType::get(In, AllocaSize / EltSize);
  418. }
  419. return;
  420. }
  421. }
  422. // Otherwise, we have a case that we can't handle with an optimized vector
  423. // form. We can still turn this into a large integer.
  424. ScalarKind = Integer;
  425. }
  426. /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
  427. /// returning true if the type was successfully merged and false otherwise.
  428. bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
  429. uint64_t Offset) {
  430. if (VInTy->getBitWidth() / 8 == AllocaSize && Offset == 0) {
  431. // If we're storing/loading a vector of the right size, allow it as a
  432. // vector. If this the first vector we see, remember the type so that
  433. // we know the element size. If this is a subsequent access, ignore it
  434. // even if it is a differing type but the same size. Worst case we can
  435. // bitcast the resultant vectors.
  436. if (!VectorTy)
  437. VectorTy = VInTy;
  438. ScalarKind = Vector;
  439. return true;
  440. }
  441. return false;
  442. }
  443. /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
  444. /// its accesses to a single vector type, return true and set VecTy to
  445. /// the new type. If we could convert the alloca into a single promotable
  446. /// integer, return true but set VecTy to VoidTy. Further, if the use is not a
  447. /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
  448. /// is the current offset from the base of the alloca being analyzed.
  449. ///
  450. /// If we see at least one access to the value that is as a vector type, set the
  451. /// SawVec flag.
  452. bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
  453. Value *NonConstantIdx) {
  454. for (User *U : V->users()) {
  455. Instruction *UI = cast<Instruction>(U);
  456. if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  457. // Don't break volatile loads.
  458. if (!LI->isSimple())
  459. return false;
  460. HadNonMemTransferAccess = true;
  461. MergeInTypeForLoadOrStore(LI->getType(), Offset);
  462. continue;
  463. }
  464. if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
  465. // Storing the pointer, not into the value?
  466. if (SI->getOperand(0) == V || !SI->isSimple())
  467. return false;
  468. HadNonMemTransferAccess = true;
  469. MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
  470. continue;
  471. }
  472. if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) {
  473. if (!onlyUsedByLifetimeMarkers(BCI))
  474. IsNotTrivial = true; // Can't be mem2reg'd.
  475. if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
  476. return false;
  477. continue;
  478. }
  479. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) {
  480. // If this is a GEP with a variable indices, we can't handle it.
  481. PointerType *PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
  482. if (!PtrTy)
  483. return false;
  484. // Compute the offset that this GEP adds to the pointer.
  485. SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
  486. Value *GEPNonConstantIdx = nullptr;
  487. if (!GEP->hasAllConstantIndices()) {
  488. if (!isa<VectorType>(PtrTy->getElementType()))
  489. return false;
  490. if (NonConstantIdx)
  491. return false;
  492. GEPNonConstantIdx = Indices.pop_back_val();
  493. if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
  494. return false;
  495. HadDynamicAccess = true;
  496. } else
  497. GEPNonConstantIdx = NonConstantIdx;
  498. uint64_t GEPOffset = DL.getIndexedOffset(PtrTy, Indices);
  499. // See if all uses can be converted.
  500. if (!CanConvertToScalar(GEP, Offset + GEPOffset, GEPNonConstantIdx))
  501. return false;
  502. IsNotTrivial = true; // Can't be mem2reg'd.
  503. HadNonMemTransferAccess = true;
  504. continue;
  505. }
  506. // If this is a constant sized memset of a constant value (e.g. 0) we can
  507. // handle it.
  508. if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) {
  509. // Store to dynamic index.
  510. if (NonConstantIdx)
  511. return false;
  512. // Store of constant value.
  513. if (!isa<ConstantInt>(MSI->getValue()))
  514. return false;
  515. // Store of constant size.
  516. ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
  517. if (!Len)
  518. return false;
  519. // If the size differs from the alloca, we can only convert the alloca to
  520. // an integer bag-of-bits.
  521. // FIXME: This should handle all of the cases that are currently accepted
  522. // as vector element insertions.
  523. if (Len->getZExtValue() != AllocaSize || Offset != 0)
  524. ScalarKind = Integer;
  525. IsNotTrivial = true; // Can't be mem2reg'd.
  526. HadNonMemTransferAccess = true;
  527. continue;
  528. }
  529. // If this is a memcpy or memmove into or out of the whole allocation, we
  530. // can handle it like a load or store of the scalar type.
  531. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) {
  532. // Store to dynamic index.
  533. if (NonConstantIdx)
  534. return false;
  535. ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
  536. if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
  537. return false;
  538. IsNotTrivial = true; // Can't be mem2reg'd.
  539. continue;
  540. }
  541. // If this is a lifetime intrinsic, we can handle it.
  542. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) {
  543. if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
  544. II->getIntrinsicID() == Intrinsic::lifetime_end) {
  545. continue;
  546. }
  547. }
  548. // Otherwise, we cannot handle this!
  549. return false;
  550. }
  551. return true;
  552. }
  553. /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
  554. /// directly. This happens when we are converting an "integer union" to a
  555. /// single integer scalar, or when we are converting a "vector union" to a
  556. /// vector with insert/extractelement instructions.
  557. ///
  558. /// Offset is an offset from the original alloca, in bits that need to be
  559. /// shifted to the right. By the end of this, there should be no uses of Ptr.
  560. void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
  561. uint64_t Offset,
  562. Value *NonConstantIdx) {
  563. while (!Ptr->use_empty()) {
  564. Instruction *User = cast<Instruction>(Ptr->user_back());
  565. if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
  566. ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
  567. CI->eraseFromParent();
  568. continue;
  569. }
  570. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  571. // Compute the offset that this GEP adds to the pointer.
  572. SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
  573. Value *GEPNonConstantIdx = nullptr;
  574. if (!GEP->hasAllConstantIndices()) {
  575. assert(!NonConstantIdx &&
  576. "Dynamic GEP reading from dynamic GEP unsupported");
  577. GEPNonConstantIdx = Indices.pop_back_val();
  578. } else
  579. GEPNonConstantIdx = NonConstantIdx;
  580. uint64_t GEPOffset =
  581. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  582. ConvertUsesToScalar(GEP, NewAI, Offset + GEPOffset * 8,
  583. GEPNonConstantIdx);
  584. GEP->eraseFromParent();
  585. continue;
  586. }
  587. IRBuilder<> Builder(User);
  588. if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
  589. // The load is a bit extract from NewAI shifted right by Offset bits.
  590. Value *LoadedVal = Builder.CreateLoad(NewAI);
  591. Value *NewLoadVal = ConvertScalar_ExtractValue(
  592. LoadedVal, LI->getType(), Offset, NonConstantIdx, Builder);
  593. LI->replaceAllUsesWith(NewLoadVal);
  594. LI->eraseFromParent();
  595. continue;
  596. }
  597. if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  598. assert(SI->getOperand(0) != Ptr && "Consistency error!");
  599. Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName() + ".in");
  600. Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
  601. NonConstantIdx, Builder);
  602. Builder.CreateStore(New, NewAI);
  603. SI->eraseFromParent();
  604. // If the load we just inserted is now dead, then the inserted store
  605. // overwrote the entire thing.
  606. if (Old->use_empty())
  607. Old->eraseFromParent();
  608. continue;
  609. }
  610. // If this is a constant sized memset of a constant value (e.g. 0) we can
  611. // transform it into a store of the expanded constant value.
  612. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
  613. assert(MSI->getRawDest() == Ptr && "Consistency error!");
  614. assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
  615. int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
  616. if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
  617. unsigned NumBytes = static_cast<unsigned>(SNumBytes);
  618. unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
  619. // Compute the value replicated the right number of times.
  620. APInt APVal(NumBytes * 8, Val);
  621. // Splat the value if non-zero.
  622. if (Val)
  623. for (unsigned i = 1; i != NumBytes; ++i)
  624. APVal |= APVal << 8;
  625. Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName() + ".in");
  626. Value *New = ConvertScalar_InsertValue(
  627. ConstantInt::get(User->getContext(), APVal), Old, Offset, nullptr,
  628. Builder);
  629. Builder.CreateStore(New, NewAI);
  630. // If the load we just inserted is now dead, then the memset overwrote
  631. // the entire thing.
  632. if (Old->use_empty())
  633. Old->eraseFromParent();
  634. }
  635. MSI->eraseFromParent();
  636. continue;
  637. }
  638. // If this is a memcpy or memmove into or out of the whole allocation, we
  639. // can handle it like a load or store of the scalar type.
  640. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
  641. assert(Offset == 0 && "must be store to start of alloca");
  642. assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
  643. // If the source and destination are both to the same alloca, then this is
  644. // a noop copy-to-self, just delete it. Otherwise, emit a load and store
  645. // as appropriate.
  646. AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
  647. if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
  648. // Dest must be OrigAI, change this to be a load from the original
  649. // pointer (bitcasted), then a store to our new alloca.
  650. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
  651. Value *SrcPtr = MTI->getSource();
  652. PointerType *SPTy = cast<PointerType>(SrcPtr->getType());
  653. PointerType *AIPTy = cast<PointerType>(NewAI->getType());
  654. if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
  655. AIPTy = PointerType::get(AIPTy->getElementType(),
  656. SPTy->getAddressSpace());
  657. }
  658. SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
  659. LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
  660. SrcVal->setAlignment(MTI->getAlignment());
  661. Builder.CreateStore(SrcVal, NewAI);
  662. } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
  663. // Src must be OrigAI, change this to be a load from NewAI then a store
  664. // through the original dest pointer (bitcasted).
  665. assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
  666. LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
  667. PointerType *DPTy = cast<PointerType>(MTI->getDest()->getType());
  668. PointerType *AIPTy = cast<PointerType>(NewAI->getType());
  669. if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
  670. AIPTy = PointerType::get(AIPTy->getElementType(),
  671. DPTy->getAddressSpace());
  672. }
  673. Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
  674. StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
  675. NewStore->setAlignment(MTI->getAlignment());
  676. } else {
  677. // Noop transfer. Src == Dst
  678. }
  679. MTI->eraseFromParent();
  680. continue;
  681. }
  682. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
  683. if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
  684. II->getIntrinsicID() == Intrinsic::lifetime_end) {
  685. // There's no need to preserve these, as the resulting alloca will be
  686. // converted to a register anyways.
  687. II->eraseFromParent();
  688. continue;
  689. }
  690. }
  691. llvm_unreachable("Unsupported operation!");
  692. }
  693. }
  694. /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
  695. /// or vector value FromVal, extracting the bits from the offset specified by
  696. /// Offset. This returns the value, which is of type ToType.
  697. ///
  698. /// This happens when we are converting an "integer union" to a single
  699. /// integer scalar, or when we are converting a "vector union" to a vector with
  700. /// insert/extractelement instructions.
  701. ///
  702. /// Offset is an offset from the original alloca, in bits that need to be
  703. /// shifted to the right.
  704. Value *ConvertToScalarInfo::ConvertScalar_ExtractValue(Value *FromVal,
  705. Type *ToType,
  706. uint64_t Offset,
  707. Value *NonConstantIdx,
  708. IRBuilder<> &Builder) {
  709. // If the load is of the whole new alloca, no conversion is needed.
  710. Type *FromType = FromVal->getType();
  711. if (FromType == ToType && Offset == 0)
  712. return FromVal;
  713. // If the result alloca is a vector type, this is either an element
  714. // access or a bitcast to another vector type of the same size.
  715. if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
  716. unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
  717. unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
  718. if (FromTypeSize == ToTypeSize)
  719. return Builder.CreateBitCast(FromVal, ToType);
  720. // Otherwise it must be an element access.
  721. unsigned Elt = 0;
  722. if (Offset) {
  723. unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
  724. Elt = Offset / EltSize;
  725. assert(EltSize * Elt == Offset && "Invalid modulus in validity checking");
  726. }
  727. // Return the element extracted out of it.
  728. Value *Idx;
  729. if (NonConstantIdx) {
  730. if (Elt)
  731. Idx = Builder.CreateAdd(NonConstantIdx, Builder.getInt32(Elt),
  732. "dyn.offset");
  733. else
  734. Idx = NonConstantIdx;
  735. } else
  736. Idx = Builder.getInt32(Elt);
  737. Value *V = Builder.CreateExtractElement(FromVal, Idx);
  738. if (V->getType() != ToType)
  739. V = Builder.CreateBitCast(V, ToType);
  740. return V;
  741. }
  742. // If ToType is a first class aggregate, extract out each of the pieces and
  743. // use insertvalue's to form the FCA.
  744. if (StructType *ST = dyn_cast<StructType>(ToType)) {
  745. assert(!NonConstantIdx &&
  746. "Dynamic indexing into struct types not supported");
  747. const StructLayout &Layout = *DL.getStructLayout(ST);
  748. Value *Res = UndefValue::get(ST);
  749. for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
  750. Value *Elt = ConvertScalar_ExtractValue(
  751. FromVal, ST->getElementType(i),
  752. Offset + Layout.getElementOffsetInBits(i), nullptr, Builder);
  753. Res = Builder.CreateInsertValue(Res, Elt, i);
  754. }
  755. return Res;
  756. }
  757. if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
  758. assert(!NonConstantIdx &&
  759. "Dynamic indexing into array types not supported");
  760. uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
  761. Value *Res = UndefValue::get(AT);
  762. for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
  763. Value *Elt =
  764. ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
  765. Offset + i * EltSize, nullptr, Builder);
  766. Res = Builder.CreateInsertValue(Res, Elt, i);
  767. }
  768. return Res;
  769. }
  770. // Otherwise, this must be a union that was converted to an integer value.
  771. IntegerType *NTy = cast<IntegerType>(FromVal->getType());
  772. // If this is a big-endian system and the load is narrower than the
  773. // full alloca type, we need to do a shift to get the right bits.
  774. int ShAmt = 0;
  775. if (DL.isBigEndian()) {
  776. // On big-endian machines, the lowest bit is stored at the bit offset
  777. // from the pointer given by getTypeStoreSizeInBits. This matters for
  778. // integers with a bitwidth that is not a multiple of 8.
  779. ShAmt = DL.getTypeStoreSizeInBits(NTy) - DL.getTypeStoreSizeInBits(ToType) -
  780. Offset;
  781. } else {
  782. ShAmt = Offset;
  783. }
  784. // Note: we support negative bitwidths (with shl) which are not defined.
  785. // We do this to support (f.e.) loads off the end of a structure where
  786. // only some bits are used.
  787. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
  788. FromVal = Builder.CreateLShr(FromVal,
  789. ConstantInt::get(FromVal->getType(), ShAmt));
  790. else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
  791. FromVal = Builder.CreateShl(FromVal,
  792. ConstantInt::get(FromVal->getType(), -ShAmt));
  793. // Finally, unconditionally truncate the integer to the right width.
  794. unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
  795. if (LIBitWidth < NTy->getBitWidth())
  796. FromVal = Builder.CreateTrunc(
  797. FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth));
  798. else if (LIBitWidth > NTy->getBitWidth())
  799. FromVal = Builder.CreateZExt(
  800. FromVal, IntegerType::get(FromVal->getContext(), LIBitWidth));
  801. // If the result is an integer, this is a trunc or bitcast.
  802. if (ToType->isIntegerTy()) {
  803. // Should be done.
  804. } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
  805. // Just do a bitcast, we know the sizes match up.
  806. FromVal = Builder.CreateBitCast(FromVal, ToType);
  807. } else {
  808. // Otherwise must be a pointer.
  809. FromVal = Builder.CreateIntToPtr(FromVal, ToType);
  810. }
  811. assert(FromVal->getType() == ToType && "Didn't convert right?");
  812. return FromVal;
  813. }
  814. /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
  815. /// or vector value "Old" at the offset specified by Offset.
  816. ///
  817. /// This happens when we are converting an "integer union" to a
  818. /// single integer scalar, or when we are converting a "vector union" to a
  819. /// vector with insert/extractelement instructions.
  820. ///
  821. /// Offset is an offset from the original alloca, in bits that need to be
  822. /// shifted to the right.
  823. ///
  824. /// NonConstantIdx is an index value if there was a GEP with a non-constant
  825. /// index value. If this is 0 then all GEPs used to find this insert address
  826. /// are constant.
  827. Value *ConvertToScalarInfo::ConvertScalar_InsertValue(Value *SV, Value *Old,
  828. uint64_t Offset,
  829. Value *NonConstantIdx,
  830. IRBuilder<> &Builder) {
  831. // Convert the stored type to the actual type, shift it left to insert
  832. // then 'or' into place.
  833. Type *AllocaType = Old->getType();
  834. LLVMContext &Context = Old->getContext();
  835. if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
  836. uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
  837. uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
  838. // Changing the whole vector with memset or with an access of a different
  839. // vector type?
  840. if (ValSize == VecSize)
  841. return Builder.CreateBitCast(SV, AllocaType);
  842. // Must be an element insertion.
  843. Type *EltTy = VTy->getElementType();
  844. if (SV->getType() != EltTy)
  845. SV = Builder.CreateBitCast(SV, EltTy);
  846. uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
  847. unsigned Elt = Offset / EltSize;
  848. Value *Idx;
  849. if (NonConstantIdx) {
  850. if (Elt)
  851. Idx = Builder.CreateAdd(NonConstantIdx, Builder.getInt32(Elt),
  852. "dyn.offset");
  853. else
  854. Idx = NonConstantIdx;
  855. } else
  856. Idx = Builder.getInt32(Elt);
  857. return Builder.CreateInsertElement(Old, SV, Idx);
  858. }
  859. // If SV is a first-class aggregate value, insert each value recursively.
  860. if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
  861. assert(!NonConstantIdx &&
  862. "Dynamic indexing into struct types not supported");
  863. const StructLayout &Layout = *DL.getStructLayout(ST);
  864. for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
  865. Value *Elt = Builder.CreateExtractValue(SV, i);
  866. Old = ConvertScalar_InsertValue(Elt, Old,
  867. Offset + Layout.getElementOffsetInBits(i),
  868. nullptr, Builder);
  869. }
  870. return Old;
  871. }
  872. if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
  873. assert(!NonConstantIdx &&
  874. "Dynamic indexing into array types not supported");
  875. uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
  876. for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
  877. Value *Elt = Builder.CreateExtractValue(SV, i);
  878. Old = ConvertScalar_InsertValue(Elt, Old, Offset + i * EltSize, nullptr,
  879. Builder);
  880. }
  881. return Old;
  882. }
  883. // If SV is a float, convert it to the appropriate integer type.
  884. // If it is a pointer, do the same.
  885. unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
  886. unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
  887. unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
  888. unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
  889. if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
  890. SV =
  891. Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(), SrcWidth));
  892. else if (SV->getType()->isPointerTy())
  893. SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
  894. // Zero extend or truncate the value if needed.
  895. if (SV->getType() != AllocaType) {
  896. if (SV->getType()->getPrimitiveSizeInBits() <
  897. AllocaType->getPrimitiveSizeInBits())
  898. SV = Builder.CreateZExt(SV, AllocaType);
  899. else {
  900. // Truncation may be needed if storing more than the alloca can hold
  901. // (undefined behavior).
  902. SV = Builder.CreateTrunc(SV, AllocaType);
  903. SrcWidth = DestWidth;
  904. SrcStoreWidth = DestStoreWidth;
  905. }
  906. }
  907. // If this is a big-endian system and the store is narrower than the
  908. // full alloca type, we need to do a shift to get the right bits.
  909. int ShAmt = 0;
  910. if (DL.isBigEndian()) {
  911. // On big-endian machines, the lowest bit is stored at the bit offset
  912. // from the pointer given by getTypeStoreSizeInBits. This matters for
  913. // integers with a bitwidth that is not a multiple of 8.
  914. ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
  915. } else {
  916. ShAmt = Offset;
  917. }
  918. // Note: we support negative bitwidths (with shr) which are not defined.
  919. // We do this to support (f.e.) stores off the end of a structure where
  920. // only some bits in the structure are set.
  921. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
  922. if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
  923. SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
  924. Mask <<= ShAmt;
  925. } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
  926. SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
  927. Mask = Mask.lshr(-ShAmt);
  928. }
  929. // Mask out the bits we are about to insert from the old value, and or
  930. // in the new bits.
  931. if (SrcWidth != DestWidth) {
  932. assert(DestWidth > SrcWidth);
  933. Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
  934. SV = Builder.CreateOr(Old, SV, "ins");
  935. }
  936. return SV;
  937. }
  938. //===----------------------------------------------------------------------===//
  939. // SRoA Driver
  940. //===----------------------------------------------------------------------===//
  941. bool SROA_HLSL::runOnFunction(Function &F) {
  942. Module *M = F.getParent();
  943. HLModule &HLM = M->GetOrCreateHLModule();
  944. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  945. bool Changed = performScalarRepl(F, typeSys);
  946. // change rest memcpy into ld/st.
  947. MemcpySplitter splitter(F.getContext(), typeSys);
  948. splitter.Split(F);
  949. Changed |= markPrecise(F);
  950. return Changed;
  951. }
  952. namespace {
  953. class AllocaPromoter : public LoadAndStorePromoter {
  954. AllocaInst *AI;
  955. DIBuilder *DIB;
  956. SmallVector<DbgDeclareInst *, 4> DDIs;
  957. SmallVector<DbgValueInst *, 4> DVIs;
  958. public:
  959. AllocaPromoter(ArrayRef<Instruction *> Insts, SSAUpdater &S, DIBuilder *DB)
  960. : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
  961. void run(AllocaInst *AI, const SmallVectorImpl<Instruction *> &Insts) {
  962. // Remember which alloca we're promoting (for isInstInList).
  963. this->AI = AI;
  964. if (auto *L = LocalAsMetadata::getIfExists(AI)) {
  965. if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
  966. for (User *U : DINode->users())
  967. if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
  968. DDIs.push_back(DDI);
  969. else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
  970. DVIs.push_back(DVI);
  971. }
  972. }
  973. LoadAndStorePromoter::run(Insts);
  974. AI->eraseFromParent();
  975. for (SmallVectorImpl<DbgDeclareInst *>::iterator I = DDIs.begin(),
  976. E = DDIs.end();
  977. I != E; ++I) {
  978. DbgDeclareInst *DDI = *I;
  979. DDI->eraseFromParent();
  980. }
  981. for (SmallVectorImpl<DbgValueInst *>::iterator I = DVIs.begin(),
  982. E = DVIs.end();
  983. I != E; ++I) {
  984. DbgValueInst *DVI = *I;
  985. DVI->eraseFromParent();
  986. }
  987. }
  988. bool
  989. isInstInList(Instruction *I,
  990. const SmallVectorImpl<Instruction *> &Insts) const override {
  991. if (LoadInst *LI = dyn_cast<LoadInst>(I))
  992. return LI->getOperand(0) == AI;
  993. return cast<StoreInst>(I)->getPointerOperand() == AI;
  994. }
  995. void updateDebugInfo(Instruction *Inst) const override {
  996. for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
  997. E = DDIs.end();
  998. I != E; ++I) {
  999. DbgDeclareInst *DDI = *I;
  1000. if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
  1001. ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
  1002. else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
  1003. ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
  1004. }
  1005. for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
  1006. E = DVIs.end();
  1007. I != E; ++I) {
  1008. DbgValueInst *DVI = *I;
  1009. Value *Arg = nullptr;
  1010. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  1011. // If an argument is zero extended then use argument directly. The ZExt
  1012. // may be zapped by an optimization pass in future.
  1013. if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
  1014. Arg = dyn_cast<Argument>(ZExt->getOperand(0));
  1015. if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
  1016. Arg = dyn_cast<Argument>(SExt->getOperand(0));
  1017. if (!Arg)
  1018. Arg = SI->getOperand(0);
  1019. } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  1020. Arg = LI->getOperand(0);
  1021. } else {
  1022. continue;
  1023. }
  1024. DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
  1025. DVI->getExpression(), DVI->getDebugLoc(),
  1026. Inst);
  1027. }
  1028. }
  1029. };
  1030. } // end anon namespace
  1031. /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
  1032. /// subsequently loaded can be rewritten to load both input pointers and then
  1033. /// select between the result, allowing the load of the alloca to be promoted.
  1034. /// From this:
  1035. /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
  1036. /// %V = load i32* %P2
  1037. /// to:
  1038. /// %V1 = load i32* %Alloca -> will be mem2reg'd
  1039. /// %V2 = load i32* %Other
  1040. /// %V = select i1 %cond, i32 %V1, i32 %V2
  1041. ///
  1042. /// We can do this to a select if its only uses are loads and if the operand to
  1043. /// the select can be loaded unconditionally.
  1044. static bool isSafeSelectToSpeculate(SelectInst *SI) {
  1045. const DataLayout &DL = SI->getModule()->getDataLayout();
  1046. bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL);
  1047. bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL);
  1048. for (User *U : SI->users()) {
  1049. LoadInst *LI = dyn_cast<LoadInst>(U);
  1050. if (!LI || !LI->isSimple())
  1051. return false;
  1052. // Both operands to the select need to be dereferencable, either absolutely
  1053. // (e.g. allocas) or at this point because we can see other accesses to it.
  1054. if (!TDerefable &&
  1055. !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
  1056. LI->getAlignment()))
  1057. return false;
  1058. if (!FDerefable &&
  1059. !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
  1060. LI->getAlignment()))
  1061. return false;
  1062. }
  1063. return true;
  1064. }
  1065. /// isSafePHIToSpeculate - PHI instructions that use an alloca and are
  1066. /// subsequently loaded can be rewritten to load both input pointers in the pred
  1067. /// blocks and then PHI the results, allowing the load of the alloca to be
  1068. /// promoted.
  1069. /// From this:
  1070. /// %P2 = phi [i32* %Alloca, i32* %Other]
  1071. /// %V = load i32* %P2
  1072. /// to:
  1073. /// %V1 = load i32* %Alloca -> will be mem2reg'd
  1074. /// ...
  1075. /// %V2 = load i32* %Other
  1076. /// ...
  1077. /// %V = phi [i32 %V1, i32 %V2]
  1078. ///
  1079. /// We can do this to a select if its only uses are loads and if the operand to
  1080. /// the select can be loaded unconditionally.
  1081. static bool isSafePHIToSpeculate(PHINode *PN) {
  1082. // For now, we can only do this promotion if the load is in the same block as
  1083. // the PHI, and if there are no stores between the phi and load.
  1084. // TODO: Allow recursive phi users.
  1085. // TODO: Allow stores.
  1086. BasicBlock *BB = PN->getParent();
  1087. unsigned MaxAlign = 0;
  1088. for (User *U : PN->users()) {
  1089. LoadInst *LI = dyn_cast<LoadInst>(U);
  1090. if (!LI || !LI->isSimple())
  1091. return false;
  1092. // For now we only allow loads in the same block as the PHI. This is a
  1093. // common case that happens when instcombine merges two loads through a PHI.
  1094. if (LI->getParent() != BB)
  1095. return false;
  1096. // Ensure that there are no instructions between the PHI and the load that
  1097. // could store.
  1098. for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
  1099. if (BBI->mayWriteToMemory())
  1100. return false;
  1101. MaxAlign = std::max(MaxAlign, LI->getAlignment());
  1102. }
  1103. const DataLayout &DL = PN->getModule()->getDataLayout();
  1104. // Okay, we know that we have one or more loads in the same block as the PHI.
  1105. // We can transform this if it is safe to push the loads into the predecessor
  1106. // blocks. The only thing to watch out for is that we can't put a possibly
  1107. // trapping load in the predecessor if it is a critical edge.
  1108. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  1109. BasicBlock *Pred = PN->getIncomingBlock(i);
  1110. Value *InVal = PN->getIncomingValue(i);
  1111. // If the terminator of the predecessor has side-effects (an invoke),
  1112. // there is no safe place to put a load in the predecessor.
  1113. if (Pred->getTerminator()->mayHaveSideEffects())
  1114. return false;
  1115. // If the value is produced by the terminator of the predecessor
  1116. // (an invoke), there is no valid place to put a load in the predecessor.
  1117. if (Pred->getTerminator() == InVal)
  1118. return false;
  1119. // If the predecessor has a single successor, then the edge isn't critical.
  1120. if (Pred->getTerminator()->getNumSuccessors() == 1)
  1121. continue;
  1122. // If this pointer is always safe to load, or if we can prove that there is
  1123. // already a load in the block, then we can move the load to the pred block.
  1124. if (isDereferenceablePointer(InVal, DL) ||
  1125. isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
  1126. continue;
  1127. return false;
  1128. }
  1129. return true;
  1130. }
  1131. /// tryToMakeAllocaBePromotable - This returns true if the alloca only has
  1132. /// direct (non-volatile) loads and stores to it. If the alloca is close but
  1133. /// not quite there, this will transform the code to allow promotion. As such,
  1134. /// it is a non-pure predicate.
  1135. static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
  1136. SetVector<Instruction *, SmallVector<Instruction *, 4>,
  1137. SmallPtrSet<Instruction *, 4>>
  1138. InstsToRewrite;
  1139. for (User *U : AI->users()) {
  1140. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  1141. if (!LI->isSimple())
  1142. return false;
  1143. continue;
  1144. }
  1145. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  1146. if (SI->getOperand(0) == AI || !SI->isSimple())
  1147. return false; // Don't allow a store OF the AI, only INTO the AI.
  1148. continue;
  1149. }
  1150. if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
  1151. // If the condition being selected on is a constant, fold the select, yes
  1152. // this does (rarely) happen early on.
  1153. if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
  1154. Value *Result = SI->getOperand(1 + CI->isZero());
  1155. SI->replaceAllUsesWith(Result);
  1156. SI->eraseFromParent();
  1157. // This is very rare and we just scrambled the use list of AI, start
  1158. // over completely.
  1159. return tryToMakeAllocaBePromotable(AI, DL);
  1160. }
  1161. // If it is safe to turn "load (select c, AI, ptr)" into a select of two
  1162. // loads, then we can transform this by rewriting the select.
  1163. if (!isSafeSelectToSpeculate(SI))
  1164. return false;
  1165. InstsToRewrite.insert(SI);
  1166. continue;
  1167. }
  1168. if (PHINode *PN = dyn_cast<PHINode>(U)) {
  1169. if (PN->use_empty()) { // Dead PHIs can be stripped.
  1170. InstsToRewrite.insert(PN);
  1171. continue;
  1172. }
  1173. // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
  1174. // in the pred blocks, then we can transform this by rewriting the PHI.
  1175. if (!isSafePHIToSpeculate(PN))
  1176. return false;
  1177. InstsToRewrite.insert(PN);
  1178. continue;
  1179. }
  1180. if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  1181. if (onlyUsedByLifetimeMarkers(BCI)) {
  1182. InstsToRewrite.insert(BCI);
  1183. continue;
  1184. }
  1185. }
  1186. return false;
  1187. }
  1188. // If there are no instructions to rewrite, then all uses are load/stores and
  1189. // we're done!
  1190. if (InstsToRewrite.empty())
  1191. return true;
  1192. // If we have instructions that need to be rewritten for this to be promotable
  1193. // take care of it now.
  1194. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
  1195. if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
  1196. // This could only be a bitcast used by nothing but lifetime intrinsics.
  1197. for (BitCastInst::user_iterator I = BCI->user_begin(),
  1198. E = BCI->user_end();
  1199. I != E;)
  1200. cast<Instruction>(*I++)->eraseFromParent();
  1201. BCI->eraseFromParent();
  1202. continue;
  1203. }
  1204. if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
  1205. // Selects in InstsToRewrite only have load uses. Rewrite each as two
  1206. // loads with a new select.
  1207. while (!SI->use_empty()) {
  1208. LoadInst *LI = cast<LoadInst>(SI->user_back());
  1209. IRBuilder<> Builder(LI);
  1210. LoadInst *TrueLoad =
  1211. Builder.CreateLoad(SI->getTrueValue(), LI->getName() + ".t");
  1212. LoadInst *FalseLoad =
  1213. Builder.CreateLoad(SI->getFalseValue(), LI->getName() + ".f");
  1214. // Transfer alignment and AA info if present.
  1215. TrueLoad->setAlignment(LI->getAlignment());
  1216. FalseLoad->setAlignment(LI->getAlignment());
  1217. AAMDNodes Tags;
  1218. LI->getAAMetadata(Tags);
  1219. if (Tags) {
  1220. TrueLoad->setAAMetadata(Tags);
  1221. FalseLoad->setAAMetadata(Tags);
  1222. }
  1223. Value *V =
  1224. Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
  1225. V->takeName(LI);
  1226. LI->replaceAllUsesWith(V);
  1227. LI->eraseFromParent();
  1228. }
  1229. // Now that all the loads are gone, the select is gone too.
  1230. SI->eraseFromParent();
  1231. continue;
  1232. }
  1233. // Otherwise, we have a PHI node which allows us to push the loads into the
  1234. // predecessors.
  1235. PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
  1236. if (PN->use_empty()) {
  1237. PN->eraseFromParent();
  1238. continue;
  1239. }
  1240. Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
  1241. PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
  1242. PN->getName() + ".ld", PN);
  1243. // Get the AA tags and alignment to use from one of the loads. It doesn't
  1244. // matter which one we get and if any differ, it doesn't matter.
  1245. LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
  1246. AAMDNodes AATags;
  1247. SomeLoad->getAAMetadata(AATags);
  1248. unsigned Align = SomeLoad->getAlignment();
  1249. // Rewrite all loads of the PN to use the new PHI.
  1250. while (!PN->use_empty()) {
  1251. LoadInst *LI = cast<LoadInst>(PN->user_back());
  1252. LI->replaceAllUsesWith(NewPN);
  1253. LI->eraseFromParent();
  1254. }
  1255. // Inject loads into all of the pred blocks. Keep track of which blocks we
  1256. // insert them into in case we have multiple edges from the same block.
  1257. DenseMap<BasicBlock *, LoadInst *> InsertedLoads;
  1258. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  1259. BasicBlock *Pred = PN->getIncomingBlock(i);
  1260. LoadInst *&Load = InsertedLoads[Pred];
  1261. if (!Load) {
  1262. Load = new LoadInst(PN->getIncomingValue(i),
  1263. PN->getName() + "." + Pred->getName(),
  1264. Pred->getTerminator());
  1265. Load->setAlignment(Align);
  1266. if (AATags)
  1267. Load->setAAMetadata(AATags);
  1268. }
  1269. NewPN->addIncoming(Load, Pred);
  1270. }
  1271. PN->eraseFromParent();
  1272. }
  1273. ++NumAdjusted;
  1274. return true;
  1275. }
  1276. bool SROA_HLSL::performPromotion(Function &F) {
  1277. std::vector<AllocaInst *> Allocas;
  1278. const DataLayout &DL = F.getParent()->getDataLayout();
  1279. DominatorTree *DT = nullptr;
  1280. if (HasDomTree)
  1281. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1282. AssumptionCache &AC =
  1283. getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  1284. BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
  1285. DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
  1286. bool Changed = false;
  1287. SmallVector<Instruction *, 64> Insts;
  1288. while (1) {
  1289. Allocas.clear();
  1290. // Find allocas that are safe to promote, by looking at all instructions in
  1291. // the entry node
  1292. for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
  1293. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { // Is it an alloca?
  1294. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(AI);
  1295. // Skip alloca has debug info when not promote.
  1296. if (DDI && !RunPromotion) {
  1297. continue;
  1298. }
  1299. if (tryToMakeAllocaBePromotable(AI, DL))
  1300. Allocas.push_back(AI);
  1301. }
  1302. if (Allocas.empty())
  1303. break;
  1304. if (HasDomTree)
  1305. PromoteMemToReg(Allocas, *DT, nullptr, &AC);
  1306. else {
  1307. SSAUpdater SSA;
  1308. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
  1309. AllocaInst *AI = Allocas[i];
  1310. // Build list of instructions to promote.
  1311. for (User *U : AI->users())
  1312. Insts.push_back(cast<Instruction>(U));
  1313. AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
  1314. Insts.clear();
  1315. }
  1316. }
  1317. NumPromoted += Allocas.size();
  1318. Changed = true;
  1319. }
  1320. return Changed;
  1321. }
  1322. /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
  1323. /// SROA. It must be a struct or array type with a small number of elements.
  1324. bool SROA_HLSL::ShouldAttemptScalarRepl(AllocaInst *AI) {
  1325. Type *T = AI->getAllocatedType();
  1326. // promote every struct.
  1327. if (dyn_cast<StructType>(T))
  1328. return true;
  1329. // promote every array.
  1330. if (dyn_cast<ArrayType>(T))
  1331. return true;
  1332. return false;
  1333. }
  1334. static unsigned getNestedLevelInStruct(const Type *ty) {
  1335. unsigned lvl = 0;
  1336. while (ty->isStructTy()) {
  1337. if (ty->getStructNumElements() != 1)
  1338. break;
  1339. ty = ty->getStructElementType(0);
  1340. lvl++;
  1341. }
  1342. return lvl;
  1343. }
  1344. // performScalarRepl - This algorithm is a simple worklist driven algorithm,
  1345. // which runs on all of the alloca instructions in the entry block, removing
  1346. // them if they are only used by getelementptr instructions.
  1347. //
  1348. bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
  1349. std::vector<AllocaInst *> AllocaList;
  1350. const DataLayout &DL = F.getParent()->getDataLayout();
  1351. // Make sure big alloca split first.
  1352. // This will simplify memcpy check between part of big alloca and small
  1353. // alloca. Big alloca will be split to smaller piece first, when process the
  1354. // alloca, it will be alloca flattened from big alloca instead of a GEP of big
  1355. // alloca.
  1356. auto size_cmp = [&DL](const AllocaInst *a0, const AllocaInst *a1) -> bool {
  1357. Type* a0ty = a0->getAllocatedType();
  1358. Type* a1ty = a1->getAllocatedType();
  1359. bool isUnitSzStruct0 = a0ty->isStructTy() && a0ty->getStructNumElements() == 1;
  1360. bool isUnitSzStruct1 = a1ty->isStructTy() && a1ty->getStructNumElements() == 1;
  1361. auto sz0 = DL.getTypeAllocSize(a0ty);
  1362. auto sz1 = DL.getTypeAllocSize(a1ty);
  1363. if (sz0 == sz1 && (isUnitSzStruct0 || isUnitSzStruct1))
  1364. return getNestedLevelInStruct(a0ty) < getNestedLevelInStruct(a1ty);
  1365. return sz0 < sz1;
  1366. };
  1367. std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
  1368. std::function<bool(AllocaInst *, AllocaInst *)>>
  1369. WorkList(size_cmp);
  1370. std::unordered_map<AllocaInst*, DbgDeclareInst*> DDIMap;
  1371. // Scan the entry basic block, adding allocas to the worklist.
  1372. BasicBlock &BB = F.getEntryBlock();
  1373. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
  1374. if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
  1375. if (!A->user_empty()) {
  1376. WorkList.push(A);
  1377. // merge GEP use for the allocs
  1378. HLModule::MergeGepUse(A);
  1379. if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A)) {
  1380. DDIMap[A] = DDI;
  1381. }
  1382. }
  1383. }
  1384. DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
  1385. // Process the worklist
  1386. bool Changed = false;
  1387. while (!WorkList.empty()) {
  1388. AllocaInst *AI = WorkList.top();
  1389. WorkList.pop();
  1390. // Handle dead allocas trivially. These can be formed by SROA'ing arrays
  1391. // with unused elements.
  1392. if (AI->use_empty()) {
  1393. AI->eraseFromParent();
  1394. Changed = true;
  1395. continue;
  1396. }
  1397. const bool bAllowReplace = true;
  1398. if (SROA_Helper::LowerMemcpy(AI, /*annotation*/ nullptr, typeSys, DL,
  1399. bAllowReplace)) {
  1400. Changed = true;
  1401. continue;
  1402. }
  1403. // If this alloca is impossible for us to promote, reject it early.
  1404. if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
  1405. continue;
  1406. // Check to see if we can perform the core SROA transformation. We cannot
  1407. // transform the allocation instruction if it is an array allocation
  1408. // (allocations OF arrays are ok though), and an allocation of a scalar
  1409. // value cannot be decomposed at all.
  1410. uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
  1411. // Do not promote [0 x %struct].
  1412. if (AllocaSize == 0)
  1413. continue;
  1414. Type *Ty = AI->getAllocatedType();
  1415. // Skip empty struct type.
  1416. if (SROA_Helper::IsEmptyStructType(Ty, typeSys)) {
  1417. SROA_Helper::MarkEmptyStructUsers(AI, DeadInsts);
  1418. DeleteDeadInstructions();
  1419. continue;
  1420. }
  1421. // If the alloca looks like a good candidate for scalar replacement, and
  1422. // if
  1423. // all its users can be transformed, then split up the aggregate into its
  1424. // separate elements.
  1425. if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
  1426. std::vector<Value *> Elts;
  1427. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
  1428. bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
  1429. bool SROAed = SROA_Helper::DoScalarReplacement(
  1430. AI, Elts, Builder, /*bFlatVector*/ true, hasPrecise, typeSys, DL,
  1431. DeadInsts);
  1432. if (SROAed) {
  1433. Type *Ty = AI->getAllocatedType();
  1434. // Skip empty struct parameters.
  1435. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1436. if (!dxilutil::IsHLSLMatrixType(Ty)) {
  1437. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  1438. if (SA && SA->IsEmptyStruct()) {
  1439. for (User *U : AI->users()) {
  1440. if (StoreInst *SI = dyn_cast<StoreInst>(U))
  1441. DeadInsts.emplace_back(SI);
  1442. }
  1443. DeleteDeadInstructions();
  1444. AI->replaceAllUsesWith(UndefValue::get(AI->getType()));
  1445. AI->eraseFromParent();
  1446. continue;
  1447. }
  1448. }
  1449. }
  1450. DbgDeclareInst *DDI = nullptr;
  1451. unsigned debugOffset = 0;
  1452. auto iter = DDIMap.find(AI);
  1453. if (iter != DDIMap.end()) {
  1454. DDI = iter->second;
  1455. }
  1456. // Push Elts into workList.
  1457. for (auto iter = Elts.begin(); iter != Elts.end(); iter++) {
  1458. AllocaInst *Elt = cast<AllocaInst>(*iter);
  1459. WorkList.push(Elt);
  1460. if (DDI) {
  1461. Type *Ty = Elt->getAllocatedType();
  1462. unsigned size = DL.getTypeAllocSize(Ty);
  1463. DIExpression *DDIExp =
  1464. DIB.createBitPieceExpression(debugOffset, size);
  1465. debugOffset += size;
  1466. DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DIB.insertDeclare(
  1467. Elt, DDI->getVariable(), DDIExp, DDI->getDebugLoc(), DDI));
  1468. DDIMap[Elt] = EltDDI;
  1469. }
  1470. }
  1471. // Now erase any instructions that were made dead while rewriting the
  1472. // alloca.
  1473. DeleteDeadInstructions();
  1474. ++NumReplaced;
  1475. DXASSERT(AI->getNumUses() == 0, "must have zero users.");
  1476. AI->eraseFromParent();
  1477. Changed = true;
  1478. continue;
  1479. }
  1480. }
  1481. }
  1482. return Changed;
  1483. }
  1484. // markPrecise - To save the precise attribute on alloca inst which might be removed by promote,
  1485. // mark precise attribute with function call on alloca inst stores.
  1486. bool SROA_HLSL::markPrecise(Function &F) {
  1487. bool Changed = false;
  1488. BasicBlock &BB = F.getEntryBlock();
  1489. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
  1490. if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
  1491. // TODO: Only do this on basic types.
  1492. if (HLModule::HasPreciseAttributeWithMetadata(A)) {
  1493. HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(A,
  1494. *(F.getParent()));
  1495. Changed = true;
  1496. }
  1497. }
  1498. return Changed;
  1499. }
  1500. /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
  1501. /// recursively including all their operands that become trivially dead.
  1502. void SROA_HLSL::DeleteDeadInstructions() {
  1503. while (!DeadInsts.empty()) {
  1504. Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
  1505. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
  1506. if (Instruction *U = dyn_cast<Instruction>(*OI)) {
  1507. // Zero out the operand and see if it becomes trivially dead.
  1508. // (But, don't add allocas to the dead instruction list -- they are
  1509. // already on the worklist and will be deleted separately.)
  1510. *OI = nullptr;
  1511. if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
  1512. DeadInsts.push_back(U);
  1513. }
  1514. I->eraseFromParent();
  1515. }
  1516. }
  1517. /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
  1518. /// performing scalar replacement of alloca AI. The results are flagged in
  1519. /// the Info parameter. Offset indicates the position within AI that is
  1520. /// referenced by this instruction.
  1521. void SROA_HLSL::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
  1522. AllocaInfo &Info) {
  1523. if (I->getType()->isPointerTy()) {
  1524. // Don't check object pointers.
  1525. if (dxilutil::IsHLSLObjectType(I->getType()->getPointerElementType()))
  1526. return;
  1527. }
  1528. const DataLayout &DL = I->getModule()->getDataLayout();
  1529. for (Use &U : I->uses()) {
  1530. Instruction *User = cast<Instruction>(U.getUser());
  1531. if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
  1532. isSafeForScalarRepl(BC, Offset, Info);
  1533. } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
  1534. uint64_t GEPOffset = Offset;
  1535. isSafeGEP(GEPI, GEPOffset, Info);
  1536. if (!Info.isUnsafe)
  1537. isSafeForScalarRepl(GEPI, GEPOffset, Info);
  1538. } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
  1539. ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
  1540. if (!Length || Length->isNegative())
  1541. return MarkUnsafe(Info, User);
  1542. isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
  1543. U.getOperandNo() == 0, Info, MI,
  1544. true /*AllowWholeAccess*/);
  1545. } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
  1546. if (!LI->isSimple())
  1547. return MarkUnsafe(Info, User);
  1548. Type *LIType = LI->getType();
  1549. isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
  1550. LI, true /*AllowWholeAccess*/);
  1551. Info.hasALoadOrStore = true;
  1552. } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
  1553. // Store is ok if storing INTO the pointer, not storing the pointer
  1554. if (!SI->isSimple() || SI->getOperand(0) == I)
  1555. return MarkUnsafe(Info, User);
  1556. Type *SIType = SI->getOperand(0)->getType();
  1557. isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
  1558. SI, true /*AllowWholeAccess*/);
  1559. Info.hasALoadOrStore = true;
  1560. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
  1561. if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
  1562. II->getIntrinsicID() != Intrinsic::lifetime_end)
  1563. return MarkUnsafe(Info, User);
  1564. } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
  1565. isSafePHISelectUseForScalarRepl(User, Offset, Info);
  1566. } else if (CallInst *CI = dyn_cast<CallInst>(User)) {
  1567. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  1568. // Most HL functions are safe for scalar repl.
  1569. if (HLOpcodeGroup::NotHL == group)
  1570. return MarkUnsafe(Info, User);
  1571. else if (HLOpcodeGroup::HLIntrinsic == group) {
  1572. // TODO: should we check HL parameter type for UDT overload instead of basing on IOP?
  1573. IntrinsicOp opcode = static_cast<IntrinsicOp>(GetHLOpcode(CI));
  1574. if (IntrinsicOp::IOP_TraceRay == opcode ||
  1575. IntrinsicOp::IOP_ReportHit == opcode ||
  1576. IntrinsicOp::IOP_CallShader == opcode) {
  1577. return MarkUnsafe(Info, User);
  1578. }
  1579. }
  1580. } else {
  1581. return MarkUnsafe(Info, User);
  1582. }
  1583. if (Info.isUnsafe)
  1584. return;
  1585. }
  1586. }
  1587. /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
  1588. /// derived from the alloca, we can often still split the alloca into elements.
  1589. /// This is useful if we have a large alloca where one element is phi'd
  1590. /// together somewhere: we can SRoA and promote all the other elements even if
  1591. /// we end up not being able to promote this one.
  1592. ///
  1593. /// All we require is that the uses of the PHI do not index into other parts of
  1594. /// the alloca. The most important use case for this is single load and stores
  1595. /// that are PHI'd together, which can happen due to code sinking.
  1596. void SROA_HLSL::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
  1597. AllocaInfo &Info) {
  1598. // If we've already checked this PHI, don't do it again.
  1599. if (PHINode *PN = dyn_cast<PHINode>(I))
  1600. if (!Info.CheckedPHIs.insert(PN).second)
  1601. return;
  1602. const DataLayout &DL = I->getModule()->getDataLayout();
  1603. for (User *U : I->users()) {
  1604. Instruction *UI = cast<Instruction>(U);
  1605. if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) {
  1606. isSafePHISelectUseForScalarRepl(BC, Offset, Info);
  1607. } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
  1608. // Only allow "bitcast" GEPs for simplicity. We could generalize this,
  1609. // but would have to prove that we're staying inside of an element being
  1610. // promoted.
  1611. if (!GEPI->hasAllZeroIndices())
  1612. return MarkUnsafe(Info, UI);
  1613. isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
  1614. } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  1615. if (!LI->isSimple())
  1616. return MarkUnsafe(Info, UI);
  1617. Type *LIType = LI->getType();
  1618. isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
  1619. LI, false /*AllowWholeAccess*/);
  1620. Info.hasALoadOrStore = true;
  1621. } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
  1622. // Store is ok if storing INTO the pointer, not storing the pointer
  1623. if (!SI->isSimple() || SI->getOperand(0) == I)
  1624. return MarkUnsafe(Info, UI);
  1625. Type *SIType = SI->getOperand(0)->getType();
  1626. isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
  1627. SI, false /*AllowWholeAccess*/);
  1628. Info.hasALoadOrStore = true;
  1629. } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
  1630. isSafePHISelectUseForScalarRepl(UI, Offset, Info);
  1631. } else {
  1632. return MarkUnsafe(Info, UI);
  1633. }
  1634. if (Info.isUnsafe)
  1635. return;
  1636. }
  1637. }
  1638. /// isSafeGEP - Check if a GEP instruction can be handled for scalar
  1639. /// replacement. It is safe when all the indices are constant, in-bounds
  1640. /// references, and when the resulting offset corresponds to an element within
  1641. /// the alloca type. The results are flagged in the Info parameter. Upon
  1642. /// return, Offset is adjusted as specified by the GEP indices.
  1643. void SROA_HLSL::isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset,
  1644. AllocaInfo &Info) {
  1645. gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
  1646. if (GEPIt == E)
  1647. return;
  1648. bool NonConstant = false;
  1649. unsigned NonConstantIdxSize = 0;
  1650. // Compute the offset due to this GEP and check if the alloca has a
  1651. // component element at that offset.
  1652. SmallVector<Value *, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
  1653. auto indicesIt = Indices.begin();
  1654. // Walk through the GEP type indices, checking the types that this indexes
  1655. // into.
  1656. uint32_t arraySize = 0;
  1657. bool isArrayIndexing = false;
  1658. for (;GEPIt != E; ++GEPIt) {
  1659. Type *Ty = *GEPIt;
  1660. if (Ty->isStructTy() && !dxilutil::IsHLSLMatrixType(Ty)) {
  1661. // Don't go inside struct when mark hasArrayIndexing and hasVectorIndexing.
  1662. // The following level won't affect scalar repl on the struct.
  1663. break;
  1664. }
  1665. if (GEPIt->isArrayTy()) {
  1666. arraySize = GEPIt->getArrayNumElements();
  1667. isArrayIndexing = true;
  1668. }
  1669. if (GEPIt->isVectorTy()) {
  1670. arraySize = GEPIt->getVectorNumElements();
  1671. isArrayIndexing = false;
  1672. }
  1673. // Allow dynamic indexing
  1674. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  1675. if (!IdxVal) {
  1676. // for dynamic index, use array size - 1 to check the offset
  1677. *indicesIt = Constant::getIntegerValue(
  1678. Type::getInt32Ty(GEPI->getContext()), APInt(32, arraySize - 1));
  1679. if (isArrayIndexing)
  1680. Info.hasArrayIndexing = true;
  1681. else
  1682. Info.hasVectorIndexing = true;
  1683. NonConstant = true;
  1684. }
  1685. indicesIt++;
  1686. }
  1687. // Continue iterate only for the NonConstant.
  1688. for (;GEPIt != E; ++GEPIt) {
  1689. Type *Ty = *GEPIt;
  1690. if (Ty->isArrayTy()) {
  1691. arraySize = GEPIt->getArrayNumElements();
  1692. }
  1693. if (Ty->isVectorTy()) {
  1694. arraySize = GEPIt->getVectorNumElements();
  1695. }
  1696. // Allow dynamic indexing
  1697. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  1698. if (!IdxVal) {
  1699. // for dynamic index, use array size - 1 to check the offset
  1700. *indicesIt = Constant::getIntegerValue(
  1701. Type::getInt32Ty(GEPI->getContext()), APInt(32, arraySize - 1));
  1702. NonConstant = true;
  1703. }
  1704. indicesIt++;
  1705. }
  1706. // If this GEP is non-constant then the last operand must have been a
  1707. // dynamic index into a vector. Pop this now as it has no impact on the
  1708. // constant part of the offset.
  1709. if (NonConstant)
  1710. Indices.pop_back();
  1711. const DataLayout &DL = GEPI->getModule()->getDataLayout();
  1712. Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
  1713. if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
  1714. DL))
  1715. MarkUnsafe(Info, GEPI);
  1716. }
  1717. /// isHomogeneousAggregate - Check if type T is a struct or array containing
  1718. /// elements of the same type (which is always true for arrays). If so,
  1719. /// return true with NumElts and EltTy set to the number of elements and the
  1720. /// element type, respectively.
  1721. static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, Type *&EltTy) {
  1722. if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
  1723. NumElts = AT->getNumElements();
  1724. EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
  1725. return true;
  1726. }
  1727. if (StructType *ST = dyn_cast<StructType>(T)) {
  1728. NumElts = ST->getNumContainedTypes();
  1729. EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
  1730. for (unsigned n = 1; n < NumElts; ++n) {
  1731. if (ST->getContainedType(n) != EltTy)
  1732. return false;
  1733. }
  1734. return true;
  1735. }
  1736. return false;
  1737. }
  1738. /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
  1739. /// "homogeneous" aggregates with the same element type and number of elements.
  1740. static bool isCompatibleAggregate(Type *T1, Type *T2) {
  1741. if (T1 == T2)
  1742. return true;
  1743. unsigned NumElts1, NumElts2;
  1744. Type *EltTy1, *EltTy2;
  1745. if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
  1746. isHomogeneousAggregate(T2, NumElts2, EltTy2) && NumElts1 == NumElts2 &&
  1747. EltTy1 == EltTy2)
  1748. return true;
  1749. return false;
  1750. }
  1751. /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
  1752. /// alloca or has an offset and size that corresponds to a component element
  1753. /// within it. The offset checked here may have been formed from a GEP with a
  1754. /// pointer bitcasted to a different type.
  1755. ///
  1756. /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
  1757. /// unit. If false, it only allows accesses known to be in a single element.
  1758. void SROA_HLSL::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
  1759. Type *MemOpType, bool isStore, AllocaInfo &Info,
  1760. Instruction *TheAccess, bool AllowWholeAccess) {
  1761. // What hlsl cares is Info.hasVectorIndexing.
  1762. // Do nothing here.
  1763. }
  1764. /// TypeHasComponent - Return true if T has a component type with the
  1765. /// specified offset and size. If Size is zero, do not check the size.
  1766. bool SROA_HLSL::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
  1767. const DataLayout &DL) {
  1768. Type *EltTy;
  1769. uint64_t EltSize;
  1770. if (StructType *ST = dyn_cast<StructType>(T)) {
  1771. const StructLayout *Layout = DL.getStructLayout(ST);
  1772. unsigned EltIdx = Layout->getElementContainingOffset(Offset);
  1773. EltTy = ST->getContainedType(EltIdx);
  1774. EltSize = DL.getTypeAllocSize(EltTy);
  1775. Offset -= Layout->getElementOffset(EltIdx);
  1776. } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
  1777. EltTy = AT->getElementType();
  1778. EltSize = DL.getTypeAllocSize(EltTy);
  1779. if (Offset >= AT->getNumElements() * EltSize)
  1780. return false;
  1781. Offset %= EltSize;
  1782. } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
  1783. EltTy = VT->getElementType();
  1784. EltSize = DL.getTypeAllocSize(EltTy);
  1785. if (Offset >= VT->getNumElements() * EltSize)
  1786. return false;
  1787. Offset %= EltSize;
  1788. } else {
  1789. return false;
  1790. }
  1791. if (Offset == 0 && (Size == 0 || EltSize == Size))
  1792. return true;
  1793. // Check if the component spans multiple elements.
  1794. if (Offset + Size > EltSize)
  1795. return false;
  1796. return TypeHasComponent(EltTy, Offset, Size, DL);
  1797. }
  1798. /// LoadVectorArray - Load vector array like [2 x <4 x float>] from
  1799. /// arrays like 4 [2 x float] or struct array like
  1800. /// [2 x { <4 x float>, < 4 x uint> }]
  1801. /// from arrays like [ 2 x <4 x float> ], [ 2 x <4 x uint> ].
  1802. static Value *LoadVectorOrStructArray(ArrayType *AT, ArrayRef<Value *> NewElts,
  1803. SmallVector<Value *, 8> &idxList,
  1804. IRBuilder<> &Builder) {
  1805. Type *EltTy = AT->getElementType();
  1806. Value *retVal = llvm::UndefValue::get(AT);
  1807. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  1808. uint32_t arraySize = AT->getNumElements();
  1809. for (uint32_t i = 0; i < arraySize; i++) {
  1810. Constant *idx = ConstantInt::get(i32Ty, i);
  1811. idxList.emplace_back(idx);
  1812. if (ArrayType *EltAT = dyn_cast<ArrayType>(EltTy)) {
  1813. Value *EltVal = LoadVectorOrStructArray(EltAT, NewElts, idxList, Builder);
  1814. retVal = Builder.CreateInsertValue(retVal, EltVal, i);
  1815. } else {
  1816. assert((EltTy->isVectorTy() ||
  1817. EltTy->isStructTy()) && "must be a vector or struct type");
  1818. bool isVectorTy = EltTy->isVectorTy();
  1819. Value *retVec = llvm::UndefValue::get(EltTy);
  1820. if (isVectorTy) {
  1821. for (uint32_t c = 0; c < EltTy->getVectorNumElements(); c++) {
  1822. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1823. Value *elt = Builder.CreateLoad(GEP);
  1824. retVec = Builder.CreateInsertElement(retVec, elt, c);
  1825. }
  1826. } else {
  1827. for (uint32_t c = 0; c < EltTy->getStructNumElements(); c++) {
  1828. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1829. Value *elt = Builder.CreateLoad(GEP);
  1830. retVec = Builder.CreateInsertValue(retVec, elt, c);
  1831. }
  1832. }
  1833. retVal = Builder.CreateInsertValue(retVal, retVec, i);
  1834. }
  1835. idxList.pop_back();
  1836. }
  1837. return retVal;
  1838. }
  1839. /// LoadVectorArray - Store vector array like [2 x <4 x float>] to
  1840. /// arrays like 4 [2 x float] or struct array like
  1841. /// [2 x { <4 x float>, < 4 x uint> }]
  1842. /// from arrays like [ 2 x <4 x float> ], [ 2 x <4 x uint> ].
  1843. static void StoreVectorOrStructArray(ArrayType *AT, Value *val,
  1844. ArrayRef<Value *> NewElts,
  1845. SmallVector<Value *, 8> &idxList,
  1846. IRBuilder<> &Builder) {
  1847. Type *EltTy = AT->getElementType();
  1848. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  1849. uint32_t arraySize = AT->getNumElements();
  1850. for (uint32_t i = 0; i < arraySize; i++) {
  1851. Value *elt = Builder.CreateExtractValue(val, i);
  1852. Constant *idx = ConstantInt::get(i32Ty, i);
  1853. idxList.emplace_back(idx);
  1854. if (ArrayType *EltAT = dyn_cast<ArrayType>(EltTy)) {
  1855. StoreVectorOrStructArray(EltAT, elt, NewElts, idxList, Builder);
  1856. } else {
  1857. assert((EltTy->isVectorTy() ||
  1858. EltTy->isStructTy()) && "must be a vector or struct type");
  1859. bool isVectorTy = EltTy->isVectorTy();
  1860. if (isVectorTy) {
  1861. for (uint32_t c = 0; c < EltTy->getVectorNumElements(); c++) {
  1862. Value *component = Builder.CreateExtractElement(elt, c);
  1863. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1864. Builder.CreateStore(component, GEP);
  1865. }
  1866. } else {
  1867. for (uint32_t c = 0; c < EltTy->getStructNumElements(); c++) {
  1868. Value *field = Builder.CreateExtractValue(elt, c);
  1869. Value *GEP = Builder.CreateInBoundsGEP(NewElts[c], idxList);
  1870. Builder.CreateStore(field, GEP);
  1871. }
  1872. }
  1873. }
  1874. idxList.pop_back();
  1875. }
  1876. }
  1877. /// HasPadding - Return true if the specified type has any structure or
  1878. /// alignment padding in between the elements that would be split apart
  1879. /// by SROA; return false otherwise.
  1880. static bool HasPadding(Type *Ty, const DataLayout &DL) {
  1881. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
  1882. Ty = ATy->getElementType();
  1883. return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
  1884. }
  1885. // SROA currently handles only Arrays and Structs.
  1886. StructType *STy = cast<StructType>(Ty);
  1887. const StructLayout *SL = DL.getStructLayout(STy);
  1888. unsigned PrevFieldBitOffset = 0;
  1889. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
  1890. unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
  1891. // Check to see if there is any padding between this element and the
  1892. // previous one.
  1893. if (i) {
  1894. unsigned PrevFieldEnd =
  1895. PrevFieldBitOffset + DL.getTypeSizeInBits(STy->getElementType(i - 1));
  1896. if (PrevFieldEnd < FieldBitOffset)
  1897. return true;
  1898. }
  1899. PrevFieldBitOffset = FieldBitOffset;
  1900. }
  1901. // Check for tail padding.
  1902. if (unsigned EltCount = STy->getNumElements()) {
  1903. unsigned PrevFieldEnd =
  1904. PrevFieldBitOffset +
  1905. DL.getTypeSizeInBits(STy->getElementType(EltCount - 1));
  1906. if (PrevFieldEnd < SL->getSizeInBits())
  1907. return true;
  1908. }
  1909. return false;
  1910. }
  1911. /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
  1912. /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
  1913. /// or 1 if safe after canonicalization has been performed.
  1914. bool SROA_HLSL::isSafeAllocaToScalarRepl(AllocaInst *AI) {
  1915. // Loop over the use list of the alloca. We can only transform it if all of
  1916. // the users are safe to transform.
  1917. AllocaInfo Info(AI);
  1918. isSafeForScalarRepl(AI, 0, Info);
  1919. if (Info.isUnsafe) {
  1920. DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
  1921. return false;
  1922. }
  1923. // vector indexing need translate vector into array
  1924. if (Info.hasVectorIndexing)
  1925. return false;
  1926. const DataLayout &DL = AI->getModule()->getDataLayout();
  1927. // Okay, we know all the users are promotable. If the aggregate is a memcpy
  1928. // source and destination, we have to be careful. In particular, the memcpy
  1929. // could be moving around elements that live in structure padding of the LLVM
  1930. // types, but may actually be used. In these cases, we refuse to promote the
  1931. // struct.
  1932. if (Info.isMemCpySrc && Info.isMemCpyDst &&
  1933. HasPadding(AI->getAllocatedType(), DL))
  1934. return false;
  1935. return true;
  1936. }
  1937. // Copy data from srcPtr to destPtr.
  1938. static void SimplePtrCopy(Value *DestPtr, Value *SrcPtr,
  1939. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1940. IRBuilder<> &Builder) {
  1941. if (idxList.size() > 1) {
  1942. DestPtr = Builder.CreateInBoundsGEP(DestPtr, idxList);
  1943. SrcPtr = Builder.CreateInBoundsGEP(SrcPtr, idxList);
  1944. }
  1945. llvm::LoadInst *ld = Builder.CreateLoad(SrcPtr);
  1946. Builder.CreateStore(ld, DestPtr);
  1947. }
  1948. // Copy srcVal to destPtr.
  1949. static void SimpleValCopy(Value *DestPtr, Value *SrcVal,
  1950. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1951. IRBuilder<> &Builder) {
  1952. Value *DestGEP = Builder.CreateInBoundsGEP(DestPtr, idxList);
  1953. Value *Val = SrcVal;
  1954. // Skip beginning pointer type.
  1955. for (unsigned i = 1; i < idxList.size(); i++) {
  1956. ConstantInt *idx = cast<ConstantInt>(idxList[i]);
  1957. Type *Ty = Val->getType();
  1958. if (Ty->isAggregateType()) {
  1959. Val = Builder.CreateExtractValue(Val, idx->getLimitedValue());
  1960. }
  1961. }
  1962. Builder.CreateStore(Val, DestGEP);
  1963. }
  1964. static void SimpleCopy(Value *Dest, Value *Src,
  1965. llvm::SmallVector<llvm::Value *, 16> &idxList,
  1966. IRBuilder<> &Builder) {
  1967. if (Src->getType()->isPointerTy())
  1968. SimplePtrCopy(Dest, Src, idxList, Builder);
  1969. else
  1970. SimpleValCopy(Dest, Src, idxList, Builder);
  1971. }
  1972. static Value *CreateMergedGEP(Value *Ptr, SmallVector<Value *, 16> &idxList,
  1973. IRBuilder<> &Builder) {
  1974. if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
  1975. SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
  1976. // skip idxLIst.begin() because it is included in GEPPtr idx.
  1977. IdxList.append(idxList.begin() + 1, idxList.end());
  1978. return Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
  1979. } else {
  1980. return Builder.CreateInBoundsGEP(Ptr, idxList);
  1981. }
  1982. }
  1983. static void EltMemCpy(Type *Ty, Value *Dest, Value *Src,
  1984. SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
  1985. const DataLayout &DL) {
  1986. Value *DestGEP = CreateMergedGEP(Dest, idxList, Builder);
  1987. Value *SrcGEP = CreateMergedGEP(Src, idxList, Builder);
  1988. unsigned size = DL.getTypeAllocSize(Ty);
  1989. Builder.CreateMemCpy(DestGEP, SrcGEP, size, size);
  1990. }
  1991. static bool IsMemCpyTy(Type *Ty, DxilTypeSystem &typeSys) {
  1992. if (!Ty->isAggregateType())
  1993. return false;
  1994. if (dxilutil::IsHLSLMatrixType(Ty))
  1995. return false;
  1996. if (dxilutil::IsHLSLObjectType(Ty))
  1997. return false;
  1998. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  1999. DxilStructAnnotation *STA = typeSys.GetStructAnnotation(ST);
  2000. DXASSERT(STA, "require annotation here");
  2001. if (STA->IsEmptyStruct())
  2002. return false;
  2003. // Skip 1 element struct which the element is basic type.
  2004. // Because create memcpy will create gep on the struct, memcpy the basic
  2005. // type only.
  2006. if (ST->getNumElements() == 1)
  2007. return IsMemCpyTy(ST->getElementType(0), typeSys);
  2008. }
  2009. return true;
  2010. }
  2011. // Split copy into ld/st.
  2012. static void SplitCpy(Type *Ty, Value *Dest, Value *Src,
  2013. SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
  2014. const DataLayout &DL, DxilTypeSystem &typeSys,
  2015. const DxilFieldAnnotation *fieldAnnotation, const bool bEltMemCpy = true) {
  2016. if (PointerType *PT = dyn_cast<PointerType>(Ty)) {
  2017. Constant *idx = Constant::getIntegerValue(
  2018. IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
  2019. idxList.emplace_back(idx);
  2020. SplitCpy(PT->getElementType(), Dest, Src, idxList, Builder, DL, typeSys,
  2021. fieldAnnotation, bEltMemCpy);
  2022. idxList.pop_back();
  2023. } else if (dxilutil::IsHLSLMatrixType(Ty)) {
  2024. // If no fieldAnnotation, use row major as default.
  2025. // Only load then store immediately should be fine.
  2026. bool bRowMajor = true;
  2027. if (fieldAnnotation) {
  2028. DXASSERT(fieldAnnotation->HasMatrixAnnotation(),
  2029. "must has matrix annotation");
  2030. bRowMajor = fieldAnnotation->GetMatrixAnnotation().Orientation ==
  2031. MatrixOrientation::RowMajor;
  2032. }
  2033. Module *M = Builder.GetInsertPoint()->getModule();
  2034. Value *DestMatPtr;
  2035. Value *SrcMatPtr;
  2036. if (idxList.size() == 1 && idxList[0] == ConstantInt::get(
  2037. IntegerType::get(Ty->getContext(), 32), APInt(32, 0))) {
  2038. // Avoid creating GEP(0)
  2039. DestMatPtr = Dest;
  2040. SrcMatPtr = Src;
  2041. }
  2042. else {
  2043. DestMatPtr = Builder.CreateInBoundsGEP(Dest, idxList);
  2044. SrcMatPtr = Builder.CreateInBoundsGEP(Src, idxList);
  2045. }
  2046. HLMatLoadStoreOpcode loadOp = bRowMajor
  2047. ? HLMatLoadStoreOpcode::RowMatLoad : HLMatLoadStoreOpcode::ColMatLoad;
  2048. HLMatLoadStoreOpcode storeOp = bRowMajor
  2049. ? HLMatLoadStoreOpcode::RowMatStore : HLMatLoadStoreOpcode::ColMatStore;
  2050. Value *Load = HLModule::EmitHLOperationCall(
  2051. Builder, HLOpcodeGroup::HLMatLoadStore, static_cast<unsigned>(loadOp),
  2052. Ty, { SrcMatPtr }, *M);
  2053. HLModule::EmitHLOperationCall(
  2054. Builder, HLOpcodeGroup::HLMatLoadStore, static_cast<unsigned>(storeOp),
  2055. Ty, { DestMatPtr, Load }, *M);
  2056. } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2057. if (dxilutil::IsHLSLObjectType(ST)) {
  2058. // Avoid split HLSL object.
  2059. SimpleCopy(Dest, Src, idxList, Builder);
  2060. return;
  2061. }
  2062. DxilStructAnnotation *STA = typeSys.GetStructAnnotation(ST);
  2063. DXASSERT(STA, "require annotation here");
  2064. if (STA->IsEmptyStruct())
  2065. return;
  2066. for (uint32_t i = 0; i < ST->getNumElements(); i++) {
  2067. llvm::Type *ET = ST->getElementType(i);
  2068. Constant *idx = llvm::Constant::getIntegerValue(
  2069. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  2070. idxList.emplace_back(idx);
  2071. if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
  2072. EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
  2073. } else {
  2074. DxilFieldAnnotation &EltAnnotation = STA->GetFieldAnnotation(i);
  2075. SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, &EltAnnotation,
  2076. bEltMemCpy);
  2077. }
  2078. idxList.pop_back();
  2079. }
  2080. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  2081. Type *ET = AT->getElementType();
  2082. for (uint32_t i = 0; i < AT->getNumElements(); i++) {
  2083. Constant *idx = Constant::getIntegerValue(
  2084. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  2085. idxList.emplace_back(idx);
  2086. if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
  2087. EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
  2088. } else {
  2089. SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, fieldAnnotation,
  2090. bEltMemCpy);
  2091. }
  2092. idxList.pop_back();
  2093. }
  2094. } else {
  2095. SimpleCopy(Dest, Src, idxList, Builder);
  2096. }
  2097. }
  2098. // Given a pointer to a value, produces a list of pointers to
  2099. // all scalar elements of that value and their field annotations, at any nesting level.
  2100. static void SplitPtr(Value *Ptr, // The root value pointer
  2101. SmallVectorImpl<Value *> &IdxList, // GEP indices stack during recursion
  2102. Type *Ty, // Type at the current GEP indirection level
  2103. const DxilFieldAnnotation &Annotation, // Annotation at the current GEP indirection level
  2104. SmallVectorImpl<Value *> &EltPtrList, // Accumulates pointers to each element found
  2105. SmallVectorImpl<const DxilFieldAnnotation*> &EltAnnotationList, // Accumulates field annotations for each element found
  2106. DxilTypeSystem &TypeSys,
  2107. IRBuilder<> &Builder) {
  2108. if (PointerType *PT = dyn_cast<PointerType>(Ty)) {
  2109. Constant *idx = Constant::getIntegerValue(
  2110. IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
  2111. IdxList.emplace_back(idx);
  2112. SplitPtr(Ptr, IdxList, PT->getElementType(), Annotation,
  2113. EltPtrList, EltAnnotationList, TypeSys, Builder);
  2114. IdxList.pop_back();
  2115. return;
  2116. }
  2117. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2118. if (!dxilutil::IsHLSLMatrixType(Ty) && !dxilutil::IsHLSLObjectType(ST)) {
  2119. const DxilStructAnnotation* SA = TypeSys.GetStructAnnotation(ST);
  2120. for (uint32_t i = 0; i < ST->getNumElements(); i++) {
  2121. llvm::Type *EltTy = ST->getElementType(i);
  2122. Constant *idx = llvm::Constant::getIntegerValue(
  2123. IntegerType::get(Ty->getContext(), 32), APInt(32, i));
  2124. IdxList.emplace_back(idx);
  2125. SplitPtr(Ptr, IdxList, EltTy, SA->GetFieldAnnotation(i),
  2126. EltPtrList, EltAnnotationList, TypeSys, Builder);
  2127. IdxList.pop_back();
  2128. }
  2129. return;
  2130. }
  2131. }
  2132. if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  2133. if (AT->getArrayNumElements() == 0) {
  2134. // Skip cases like [0 x %struct], nothing to copy
  2135. return;
  2136. }
  2137. Type *ElTy = AT->getElementType();
  2138. SmallVector<ArrayType *, 4> nestArrayTys;
  2139. nestArrayTys.emplace_back(AT);
  2140. // support multi level of array
  2141. while (ElTy->isArrayTy()) {
  2142. ArrayType *ElAT = cast<ArrayType>(ElTy);
  2143. nestArrayTys.emplace_back(ElAT);
  2144. ElTy = ElAT->getElementType();
  2145. }
  2146. if (ElTy->isStructTy() && !dxilutil::IsHLSLMatrixType(ElTy)) {
  2147. DXASSERT(0, "Not support array of struct when split pointers.");
  2148. return;
  2149. }
  2150. }
  2151. // Return a pointer to the current element and its annotation
  2152. Value *GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
  2153. EltPtrList.emplace_back(GEP);
  2154. EltAnnotationList.emplace_back(&Annotation);
  2155. }
  2156. // Support case when bitcast (gep ptr, 0,0) is transformed into bitcast ptr.
  2157. static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsigned size, unsigned level) {
  2158. unsigned ptrSize = DL.getTypeAllocSize(Ty);
  2159. // Size match, return current level.
  2160. if (ptrSize == size) {
  2161. // Do not go deeper for matrix or object.
  2162. if (dxilutil::IsHLSLMatrixType(Ty) || dxilutil::IsHLSLObjectType(Ty))
  2163. return level;
  2164. // For struct, go deeper if size not change.
  2165. // This will leave memcpy to deeper level when flatten.
  2166. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2167. if (ST->getNumElements() == 1) {
  2168. return MatchSizeByCheckElementType(ST->getElementType(0), DL, size, level+1);
  2169. }
  2170. }
  2171. // Don't do this for array.
  2172. // Array will be flattened as struct of array.
  2173. return level;
  2174. }
  2175. // Add ZeroIdx cannot make ptrSize bigger.
  2176. if (ptrSize < size)
  2177. return 0;
  2178. // ptrSize > size.
  2179. // Try to use element type to make size match.
  2180. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  2181. return MatchSizeByCheckElementType(ST->getElementType(0), DL, size, level+1);
  2182. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  2183. return MatchSizeByCheckElementType(AT->getElementType(), DL, size, level+1);
  2184. } else {
  2185. return 0;
  2186. }
  2187. }
  2188. static void PatchZeroIdxGEP(Value *Ptr, Value *RawPtr, MemCpyInst *MI,
  2189. unsigned level, IRBuilder<> &Builder) {
  2190. Value *zeroIdx = Builder.getInt32(0);
  2191. Value *GEP = nullptr;
  2192. if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
  2193. SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
  2194. // level not + 1 because it is included in GEPPtr idx.
  2195. IdxList.append(level, zeroIdx);
  2196. GEP = Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
  2197. } else {
  2198. SmallVector<Value *, 2> IdxList(level + 1, zeroIdx);
  2199. GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
  2200. }
  2201. // Use BitCastInst::Create to prevent idxList from being optimized.
  2202. CastInst *Cast =
  2203. BitCastInst::Create(Instruction::BitCast, GEP, RawPtr->getType());
  2204. Builder.Insert(Cast);
  2205. MI->replaceUsesOfWith(RawPtr, Cast);
  2206. // Remove RawPtr if possible.
  2207. if (RawPtr->user_empty()) {
  2208. if (Instruction *I = dyn_cast<Instruction>(RawPtr)) {
  2209. I->eraseFromParent();
  2210. }
  2211. }
  2212. }
  2213. void MemcpySplitter::PatchMemCpyWithZeroIdxGEP(MemCpyInst *MI,
  2214. const DataLayout &DL) {
  2215. Value *Dest = MI->getRawDest();
  2216. Value *Src = MI->getRawSource();
  2217. // Only remove one level bitcast generated from inline.
  2218. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  2219. Dest = BC->getOperand(0);
  2220. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  2221. Src = BC->getOperand(0);
  2222. IRBuilder<> Builder(MI);
  2223. ConstantInt *zero = Builder.getInt32(0);
  2224. Type *DestTy = Dest->getType()->getPointerElementType();
  2225. Type *SrcTy = Src->getType()->getPointerElementType();
  2226. // Support case when bitcast (gep ptr, 0,0) is transformed into
  2227. // bitcast ptr.
  2228. // Also replace (gep ptr, 0) with ptr.
  2229. ConstantInt *Length = cast<ConstantInt>(MI->getLength());
  2230. unsigned size = Length->getLimitedValue();
  2231. if (unsigned level = MatchSizeByCheckElementType(DestTy, DL, size, 0)) {
  2232. PatchZeroIdxGEP(Dest, MI->getRawDest(), MI, level, Builder);
  2233. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(Dest)) {
  2234. if (GEP->getNumIndices() == 1) {
  2235. Value *idx = *GEP->idx_begin();
  2236. if (idx == zero) {
  2237. GEP->replaceAllUsesWith(GEP->getPointerOperand());
  2238. }
  2239. }
  2240. }
  2241. if (unsigned level = MatchSizeByCheckElementType(SrcTy, DL, size, 0)) {
  2242. PatchZeroIdxGEP(Src, MI->getRawSource(), MI, level, Builder);
  2243. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
  2244. if (GEP->getNumIndices() == 1) {
  2245. Value *idx = *GEP->idx_begin();
  2246. if (idx == zero) {
  2247. GEP->replaceAllUsesWith(GEP->getPointerOperand());
  2248. }
  2249. }
  2250. }
  2251. }
  2252. void MemcpySplitter::PatchMemCpyWithZeroIdxGEP(Module &M) {
  2253. const DataLayout &DL = M.getDataLayout();
  2254. for (Function &F : M.functions()) {
  2255. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
  2256. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
  2257. // Avoid invalidating the iterator.
  2258. Instruction *I = BI++;
  2259. if (MemCpyInst *MI = dyn_cast<MemCpyInst>(I)) {
  2260. PatchMemCpyWithZeroIdxGEP(MI, DL);
  2261. }
  2262. }
  2263. }
  2264. }
  2265. }
  2266. static void DeleteMemcpy(MemCpyInst *MI) {
  2267. Value *Op0 = MI->getOperand(0);
  2268. Value *Op1 = MI->getOperand(1);
  2269. // delete memcpy
  2270. MI->eraseFromParent();
  2271. if (Instruction *op0 = dyn_cast<Instruction>(Op0)) {
  2272. if (op0->user_empty())
  2273. op0->eraseFromParent();
  2274. }
  2275. if (Instruction *op1 = dyn_cast<Instruction>(Op1)) {
  2276. if (op1->user_empty())
  2277. op1->eraseFromParent();
  2278. }
  2279. }
  2280. // If user is function call, return param annotation to get matrix major.
  2281. static DxilFieldAnnotation *FindAnnotationFromMatUser(Value *Mat,
  2282. DxilTypeSystem &typeSys) {
  2283. for (User *U : Mat->users()) {
  2284. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  2285. Function *F = CI->getCalledFunction();
  2286. if (DxilFunctionAnnotation *Anno = typeSys.GetFunctionAnnotation(F)) {
  2287. for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
  2288. if (CI->getArgOperand(i) == Mat) {
  2289. return &Anno->GetParameterAnnotation(i);
  2290. }
  2291. }
  2292. }
  2293. }
  2294. }
  2295. return nullptr;
  2296. }
  2297. void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
  2298. DxilFieldAnnotation *fieldAnnotation,
  2299. DxilTypeSystem &typeSys, const bool bEltMemCpy) {
  2300. Value *Dest = MI->getRawDest();
  2301. Value *Src = MI->getRawSource();
  2302. // Only remove one level bitcast generated from inline.
  2303. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  2304. Dest = BC->getOperand(0);
  2305. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  2306. Src = BC->getOperand(0);
  2307. if (Dest == Src) {
  2308. // delete self copy.
  2309. DeleteMemcpy(MI);
  2310. return;
  2311. }
  2312. IRBuilder<> Builder(MI);
  2313. Type *DestTy = Dest->getType()->getPointerElementType();
  2314. Type *SrcTy = Src->getType()->getPointerElementType();
  2315. // Allow copy between different address space.
  2316. if (DestTy != SrcTy) {
  2317. return;
  2318. }
  2319. // Try to find fieldAnnotation from user of Dest/Src.
  2320. if (!fieldAnnotation) {
  2321. Type *EltTy = dxilutil::GetArrayEltTy(DestTy);
  2322. if (dxilutil::IsHLSLMatrixType(EltTy)) {
  2323. fieldAnnotation = FindAnnotationFromMatUser(Dest, typeSys);
  2324. }
  2325. }
  2326. llvm::SmallVector<llvm::Value *, 16> idxList;
  2327. // split
  2328. // Matrix is treated as scalar type, will not use memcpy.
  2329. // So use nullptr for fieldAnnotation should be safe here.
  2330. SplitCpy(Dest->getType(), Dest, Src, idxList, Builder, DL, typeSys,
  2331. fieldAnnotation, bEltMemCpy);
  2332. // delete memcpy
  2333. DeleteMemcpy(MI);
  2334. }
  2335. void MemcpySplitter::Split(llvm::Function &F) {
  2336. const DataLayout &DL = F.getParent()->getDataLayout();
  2337. Function *memcpy = nullptr;
  2338. for (Function &Fn : F.getParent()->functions()) {
  2339. if (Fn.getIntrinsicID() == Intrinsic::memcpy) {
  2340. memcpy = &Fn;
  2341. break;
  2342. }
  2343. }
  2344. if (memcpy) {
  2345. for (auto U = memcpy->user_begin(); U != memcpy->user_end();) {
  2346. MemCpyInst *MI = cast<MemCpyInst>(*(U++));
  2347. if (MI->getParent()->getParent() != &F)
  2348. continue;
  2349. // Matrix is treated as scalar type, will not use memcpy.
  2350. // So use nullptr for fieldAnnotation should be safe here.
  2351. SplitMemCpy(MI, DL, /*fieldAnnotation*/ nullptr, m_typeSys,
  2352. /*bEltMemCpy*/ false);
  2353. }
  2354. }
  2355. }
  2356. //===----------------------------------------------------------------------===//
  2357. // SRoA Helper
  2358. //===----------------------------------------------------------------------===//
  2359. /// RewriteGEP - Rewrite the GEP to be relative to new element when can find a
  2360. /// new element which is struct field. If cannot find, create new element GEPs
  2361. /// and try to rewrite GEP with new GEPS.
  2362. void SROA_Helper::RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder) {
  2363. assert(OldVal == GEP->getPointerOperand() && "");
  2364. Value *NewPointer = nullptr;
  2365. SmallVector<Value *, 8> NewArgs;
  2366. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  2367. for (; GEPIt != E; ++GEPIt) {
  2368. if (GEPIt->isStructTy()) {
  2369. // must be const
  2370. ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
  2371. assert(IdxVal->getLimitedValue() < NewElts.size() && "");
  2372. NewPointer = NewElts[IdxVal->getLimitedValue()];
  2373. // The idx is used for NewPointer, not part of newGEP idx,
  2374. GEPIt++;
  2375. break;
  2376. } else if (GEPIt->isArrayTy()) {
  2377. // Add array idx.
  2378. NewArgs.push_back(GEPIt.getOperand());
  2379. } else if (GEPIt->isPointerTy()) {
  2380. // Add pointer idx.
  2381. NewArgs.push_back(GEPIt.getOperand());
  2382. } else if (GEPIt->isVectorTy()) {
  2383. // Add vector idx.
  2384. NewArgs.push_back(GEPIt.getOperand());
  2385. } else {
  2386. llvm_unreachable("should break from structTy");
  2387. }
  2388. }
  2389. if (NewPointer) {
  2390. // Struct split.
  2391. // Add rest of idx.
  2392. for (; GEPIt != E; ++GEPIt) {
  2393. NewArgs.push_back(GEPIt.getOperand());
  2394. }
  2395. // If only 1 level struct, just use the new pointer.
  2396. Value *NewGEP = NewPointer;
  2397. if (NewArgs.size() > 1) {
  2398. NewGEP = Builder.CreateInBoundsGEP(NewPointer, NewArgs);
  2399. NewGEP->takeName(GEP);
  2400. }
  2401. assert(NewGEP->getType() == GEP->getType() && "type mismatch");
  2402. GEP->replaceAllUsesWith(NewGEP);
  2403. } else {
  2404. // End at array of basic type.
  2405. Type *Ty = GEP->getType()->getPointerElementType();
  2406. if (Ty->isVectorTy() ||
  2407. (Ty->isStructTy() && !dxilutil::IsHLSLObjectType(Ty)) ||
  2408. Ty->isArrayTy()) {
  2409. SmallVector<Value *, 8> NewArgs;
  2410. NewArgs.append(GEP->idx_begin(), GEP->idx_end());
  2411. SmallVector<Value *, 8> NewGEPs;
  2412. // create new geps
  2413. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2414. Value *NewGEP = Builder.CreateGEP(nullptr, NewElts[i], NewArgs);
  2415. NewGEPs.emplace_back(NewGEP);
  2416. }
  2417. const bool bAllowReplace = isa<AllocaInst>(OldVal);
  2418. if (!SROA_Helper::LowerMemcpy(GEP, /*annoation*/ nullptr, typeSys, DL, bAllowReplace)) {
  2419. SROA_Helper helper(GEP, NewGEPs, DeadInsts, typeSys, DL);
  2420. helper.RewriteForScalarRepl(GEP, Builder);
  2421. for (Value *NewGEP : NewGEPs) {
  2422. if (NewGEP->user_empty() && isa<Instruction>(NewGEP)) {
  2423. // Delete unused newGEP.
  2424. cast<Instruction>(NewGEP)->eraseFromParent();
  2425. }
  2426. }
  2427. }
  2428. } else {
  2429. Value *vecIdx = NewArgs.back();
  2430. if (ConstantInt *immVecIdx = dyn_cast<ConstantInt>(vecIdx)) {
  2431. // Replace vecArray[arrayIdx][immVecIdx]
  2432. // with scalarArray_immVecIdx[arrayIdx]
  2433. // Pop the vecIdx.
  2434. NewArgs.pop_back();
  2435. Value *NewGEP = NewElts[immVecIdx->getLimitedValue()];
  2436. if (NewArgs.size() > 1) {
  2437. NewGEP = Builder.CreateInBoundsGEP(NewGEP, NewArgs);
  2438. NewGEP->takeName(GEP);
  2439. }
  2440. assert(NewGEP->getType() == GEP->getType() && "type mismatch");
  2441. GEP->replaceAllUsesWith(NewGEP);
  2442. } else {
  2443. // dynamic vector indexing.
  2444. assert(0 && "should not reach here");
  2445. }
  2446. }
  2447. }
  2448. // Remove the use so that the caller can keep iterating over its other users
  2449. DXASSERT(GEP->user_empty(), "All uses of the GEP should have been eliminated");
  2450. if (isa<Instruction>(GEP)) {
  2451. GEP->setOperand(GEP->getPointerOperandIndex(), UndefValue::get(GEP->getPointerOperand()->getType()));
  2452. DeadInsts.push_back(GEP);
  2453. }
  2454. else {
  2455. cast<Constant>(GEP)->destroyConstant();
  2456. }
  2457. }
  2458. /// isVectorOrStructArray - Check if T is array of vector or struct.
  2459. static bool isVectorOrStructArray(Type *T) {
  2460. if (!T->isArrayTy())
  2461. return false;
  2462. T = dxilutil::GetArrayEltTy(T);
  2463. return T->isStructTy() || T->isVectorTy();
  2464. }
  2465. static void SimplifyStructValUsage(Value *StructVal, std::vector<Value *> Elts,
  2466. SmallVectorImpl<Value *> &DeadInsts) {
  2467. for (User *user : StructVal->users()) {
  2468. if (ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(user)) {
  2469. DXASSERT(Extract->getNumIndices() == 1, "only support 1 index case");
  2470. unsigned index = Extract->getIndices()[0];
  2471. Value *Elt = Elts[index];
  2472. Extract->replaceAllUsesWith(Elt);
  2473. DeadInsts.emplace_back(Extract);
  2474. } else if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(user)) {
  2475. DXASSERT(Insert->getNumIndices() == 1, "only support 1 index case");
  2476. unsigned index = Insert->getIndices()[0];
  2477. if (Insert->getAggregateOperand() == StructVal) {
  2478. // Update field.
  2479. std::vector<Value *> NewElts = Elts;
  2480. NewElts[index] = Insert->getInsertedValueOperand();
  2481. SimplifyStructValUsage(Insert, NewElts, DeadInsts);
  2482. } else {
  2483. // Insert to another bigger struct.
  2484. IRBuilder<> Builder(Insert);
  2485. Value *TmpStructVal = UndefValue::get(StructVal->getType());
  2486. for (unsigned i = 0; i < Elts.size(); i++) {
  2487. TmpStructVal =
  2488. Builder.CreateInsertValue(TmpStructVal, Elts[i], {i});
  2489. }
  2490. Insert->replaceUsesOfWith(StructVal, TmpStructVal);
  2491. }
  2492. }
  2493. }
  2494. }
  2495. /// RewriteForLoad - Replace OldVal with flattened NewElts in LoadInst.
  2496. void SROA_Helper::RewriteForLoad(LoadInst *LI) {
  2497. Type *LIType = LI->getType();
  2498. Type *ValTy = OldVal->getType()->getPointerElementType();
  2499. IRBuilder<> Builder(LI);
  2500. if (LIType->isVectorTy()) {
  2501. // Replace:
  2502. // %res = load { 2 x i32 }* %alloc
  2503. // with:
  2504. // %load.0 = load i32* %alloc.0
  2505. // %insert.0 insertvalue { 2 x i32 } zeroinitializer, i32 %load.0, 0
  2506. // %load.1 = load i32* %alloc.1
  2507. // %insert = insertvalue { 2 x i32 } %insert.0, i32 %load.1, 1
  2508. Value *Insert = UndefValue::get(LIType);
  2509. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2510. Value *Load = Builder.CreateLoad(NewElts[i], "load");
  2511. Insert = Builder.CreateInsertElement(Insert, Load, i, "insert");
  2512. }
  2513. LI->replaceAllUsesWith(Insert);
  2514. } else if (isCompatibleAggregate(LIType, ValTy)) {
  2515. if (isVectorOrStructArray(LIType)) {
  2516. // Replace:
  2517. // %res = load [2 x <2 x float>] * %alloc
  2518. // with:
  2519. // %load.0 = load [4 x float]* %alloc.0
  2520. // %insert.0 insertvalue [4 x float] zeroinitializer,i32 %load.0,0
  2521. // %load.1 = load [4 x float]* %alloc.1
  2522. // %insert = insertvalue [4 x float] %insert.0, i32 %load.1, 1
  2523. // ...
  2524. Type *i32Ty = Type::getInt32Ty(LIType->getContext());
  2525. Value *zero = ConstantInt::get(i32Ty, 0);
  2526. SmallVector<Value *, 8> idxList;
  2527. idxList.emplace_back(zero);
  2528. Value *newLd =
  2529. LoadVectorOrStructArray(cast<ArrayType>(LIType), NewElts, idxList, Builder);
  2530. LI->replaceAllUsesWith(newLd);
  2531. } else {
  2532. // Replace:
  2533. // %res = load { i32, i32 }* %alloc
  2534. // with:
  2535. // %load.0 = load i32* %alloc.0
  2536. // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0,
  2537. // 0
  2538. // %load.1 = load i32* %alloc.1
  2539. // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
  2540. // (Also works for arrays instead of structs)
  2541. Module *M = LI->getModule();
  2542. Value *Insert = UndefValue::get(LIType);
  2543. std::vector<Value *> LdElts(NewElts.size());
  2544. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2545. Value *Ptr = NewElts[i];
  2546. Type *Ty = Ptr->getType()->getPointerElementType();
  2547. Value *Load = nullptr;
  2548. if (!dxilutil::IsHLSLMatrixType(Ty))
  2549. Load = Builder.CreateLoad(Ptr, "load");
  2550. else {
  2551. // Generate Matrix Load.
  2552. Load = HLModule::EmitHLOperationCall(
  2553. Builder, HLOpcodeGroup::HLMatLoadStore,
  2554. static_cast<unsigned>(HLMatLoadStoreOpcode::RowMatLoad), Ty,
  2555. {Ptr}, *M);
  2556. }
  2557. LdElts[i] = Load;
  2558. Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
  2559. }
  2560. LI->replaceAllUsesWith(Insert);
  2561. if (LIType->isStructTy()) {
  2562. SimplifyStructValUsage(Insert, LdElts, DeadInsts);
  2563. }
  2564. }
  2565. } else {
  2566. llvm_unreachable("other type don't need rewrite");
  2567. }
  2568. // Remove the use so that the caller can keep iterating over its other users
  2569. LI->setOperand(LI->getPointerOperandIndex(), UndefValue::get(LI->getPointerOperand()->getType()));
  2570. DeadInsts.push_back(LI);
  2571. }
  2572. /// RewriteForStore - Replace OldVal with flattened NewElts in StoreInst.
  2573. void SROA_Helper::RewriteForStore(StoreInst *SI) {
  2574. Value *Val = SI->getOperand(0);
  2575. Type *SIType = Val->getType();
  2576. IRBuilder<> Builder(SI);
  2577. Type *ValTy = OldVal->getType()->getPointerElementType();
  2578. if (SIType->isVectorTy()) {
  2579. // Replace:
  2580. // store <2 x float> %val, <2 x float>* %alloc
  2581. // with:
  2582. // %val.0 = extractelement { 2 x float } %val, 0
  2583. // store i32 %val.0, i32* %alloc.0
  2584. // %val.1 = extractelement { 2 x float } %val, 1
  2585. // store i32 %val.1, i32* %alloc.1
  2586. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2587. Value *Extract = Builder.CreateExtractElement(Val, i, Val->getName());
  2588. Builder.CreateStore(Extract, NewElts[i]);
  2589. }
  2590. } else if (isCompatibleAggregate(SIType, ValTy)) {
  2591. if (isVectorOrStructArray(SIType)) {
  2592. // Replace:
  2593. // store [2 x <2 x i32>] %val, [2 x <2 x i32>]* %alloc, align 16
  2594. // with:
  2595. // %val.0 = extractvalue [2 x <2 x i32>] %val, 0
  2596. // %all0c.0.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.0,
  2597. // i32 0, i32 0
  2598. // %val.0.0 = extractelement <2 x i32> %243, i64 0
  2599. // store i32 %val.0.0, i32* %all0c.0.0
  2600. // %alloc.1.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.1,
  2601. // i32 0, i32 0
  2602. // %val.0.1 = extractelement <2 x i32> %243, i64 1
  2603. // store i32 %val.0.1, i32* %alloc.1.0
  2604. // %val.1 = extractvalue [2 x <2 x i32>] %val, 1
  2605. // %alloc.0.0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.0,
  2606. // i32 0, i32 1
  2607. // %val.1.0 = extractelement <2 x i32> %248, i64 0
  2608. // store i32 %val.1.0, i32* %alloc.0.0
  2609. // %all0c.1.1 = getelementptr inbounds [2 x i32], [2 x i32]* %alloc.1,
  2610. // i32 0, i32 1
  2611. // %val.1.1 = extractelement <2 x i32> %248, i64 1
  2612. // store i32 %val.1.1, i32* %all0c.1.1
  2613. ArrayType *AT = cast<ArrayType>(SIType);
  2614. Type *i32Ty = Type::getInt32Ty(SIType->getContext());
  2615. Value *zero = ConstantInt::get(i32Ty, 0);
  2616. SmallVector<Value *, 8> idxList;
  2617. idxList.emplace_back(zero);
  2618. StoreVectorOrStructArray(AT, Val, NewElts, idxList, Builder);
  2619. } else {
  2620. // Replace:
  2621. // store { i32, i32 } %val, { i32, i32 }* %alloc
  2622. // with:
  2623. // %val.0 = extractvalue { i32, i32 } %val, 0
  2624. // store i32 %val.0, i32* %alloc.0
  2625. // %val.1 = extractvalue { i32, i32 } %val, 1
  2626. // store i32 %val.1, i32* %alloc.1
  2627. // (Also works for arrays instead of structs)
  2628. Module *M = SI->getModule();
  2629. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2630. Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
  2631. if (!dxilutil::IsHLSLMatrixType(Extract->getType())) {
  2632. Builder.CreateStore(Extract, NewElts[i]);
  2633. } else {
  2634. // Generate Matrix Store.
  2635. HLModule::EmitHLOperationCall(
  2636. Builder, HLOpcodeGroup::HLMatLoadStore,
  2637. static_cast<unsigned>(HLMatLoadStoreOpcode::RowMatStore),
  2638. Extract->getType(), {NewElts[i], Extract}, *M);
  2639. }
  2640. }
  2641. }
  2642. } else {
  2643. llvm_unreachable("other type don't need rewrite");
  2644. }
  2645. // Remove the use so that the caller can keep iterating over its other users
  2646. SI->setOperand(SI->getPointerOperandIndex(), UndefValue::get(SI->getPointerOperand()->getType()));
  2647. DeadInsts.push_back(SI);
  2648. }
  2649. /// RewriteMemIntrin - MI is a memcpy/memset/memmove from or to AI.
  2650. /// Rewrite it to copy or set the elements of the scalarized memory.
  2651. void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Value *OldV) {
  2652. // If this is a memcpy/memmove, construct the other pointer as the
  2653. // appropriate type. The "Other" pointer is the pointer that goes to memory
  2654. // that doesn't have anything to do with the alloca that we are promoting. For
  2655. // memset, this Value* stays null.
  2656. Value *OtherPtr = nullptr;
  2657. unsigned MemAlignment = MI->getAlignment();
  2658. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
  2659. if (OldV == MTI->getRawDest())
  2660. OtherPtr = MTI->getRawSource();
  2661. else {
  2662. assert(OldV == MTI->getRawSource());
  2663. OtherPtr = MTI->getRawDest();
  2664. }
  2665. }
  2666. // If there is an other pointer, we want to convert it to the same pointer
  2667. // type as AI has, so we can GEP through it safely.
  2668. if (OtherPtr) {
  2669. unsigned AddrSpace =
  2670. cast<PointerType>(OtherPtr->getType())->getAddressSpace();
  2671. // Remove bitcasts and all-zero GEPs from OtherPtr. This is an
  2672. // optimization, but it's also required to detect the corner case where
  2673. // both pointer operands are referencing the same memory, and where
  2674. // OtherPtr may be a bitcast or GEP that currently being rewritten. (This
  2675. // function is only called for mem intrinsics that access the whole
  2676. // aggregate, so non-zero GEPs are not an issue here.)
  2677. OtherPtr = OtherPtr->stripPointerCasts();
  2678. // Copying the alloca to itself is a no-op: just delete it.
  2679. if (OtherPtr == OldVal || OtherPtr == NewElts[0]) {
  2680. // This code will run twice for a no-op memcpy -- once for each operand.
  2681. // Put only one reference to MI on the DeadInsts list.
  2682. for (SmallVectorImpl<Value *>::const_iterator I = DeadInsts.begin(),
  2683. E = DeadInsts.end();
  2684. I != E; ++I)
  2685. if (*I == MI)
  2686. return;
  2687. // Remove the uses so that the caller can keep iterating over its other users
  2688. MI->setOperand(0, UndefValue::get(MI->getOperand(0)->getType()));
  2689. MI->setOperand(1, UndefValue::get(MI->getOperand(1)->getType()));
  2690. DeadInsts.push_back(MI);
  2691. return;
  2692. }
  2693. // If the pointer is not the right type, insert a bitcast to the right
  2694. // type.
  2695. Type *NewTy =
  2696. PointerType::get(OldVal->getType()->getPointerElementType(), AddrSpace);
  2697. if (OtherPtr->getType() != NewTy)
  2698. OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
  2699. }
  2700. // Process each element of the aggregate.
  2701. bool SROADest = MI->getRawDest() == OldV;
  2702. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
  2703. const DataLayout &DL = MI->getModule()->getDataLayout();
  2704. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2705. // If this is a memcpy/memmove, emit a GEP of the other element address.
  2706. Value *OtherElt = nullptr;
  2707. unsigned OtherEltAlign = MemAlignment;
  2708. if (OtherPtr) {
  2709. Value *Idx[2] = {Zero,
  2710. ConstantInt::get(Type::getInt32Ty(MI->getContext()), i)};
  2711. OtherElt = GetElementPtrInst::CreateInBounds(
  2712. OtherPtr, Idx, OtherPtr->getName() + "." + Twine(i), MI);
  2713. uint64_t EltOffset;
  2714. PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
  2715. Type *OtherTy = OtherPtrTy->getElementType();
  2716. if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
  2717. EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
  2718. } else {
  2719. Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
  2720. EltOffset = DL.getTypeAllocSize(EltTy) * i;
  2721. }
  2722. // The alignment of the other pointer is the guaranteed alignment of the
  2723. // element, which is affected by both the known alignment of the whole
  2724. // mem intrinsic and the alignment of the element. If the alignment of
  2725. // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
  2726. // known alignment is just 4 bytes.
  2727. OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
  2728. }
  2729. Value *EltPtr = NewElts[i];
  2730. Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
  2731. // If we got down to a scalar, insert a load or store as appropriate.
  2732. if (EltTy->isSingleValueType()) {
  2733. if (isa<MemTransferInst>(MI)) {
  2734. if (SROADest) {
  2735. // From Other to Alloca.
  2736. Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
  2737. new StoreInst(Elt, EltPtr, MI);
  2738. } else {
  2739. // From Alloca to Other.
  2740. Value *Elt = new LoadInst(EltPtr, "tmp", MI);
  2741. new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
  2742. }
  2743. continue;
  2744. }
  2745. assert(isa<MemSetInst>(MI));
  2746. // If the stored element is zero (common case), just store a null
  2747. // constant.
  2748. Constant *StoreVal;
  2749. if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
  2750. if (CI->isZero()) {
  2751. StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
  2752. } else {
  2753. // If EltTy is a vector type, get the element type.
  2754. Type *ValTy = EltTy->getScalarType();
  2755. // Construct an integer with the right value.
  2756. unsigned EltSize = DL.getTypeSizeInBits(ValTy);
  2757. APInt OneVal(EltSize, CI->getZExtValue());
  2758. APInt TotalVal(OneVal);
  2759. // Set each byte.
  2760. for (unsigned i = 0; 8 * i < EltSize; ++i) {
  2761. TotalVal = TotalVal.shl(8);
  2762. TotalVal |= OneVal;
  2763. }
  2764. // Convert the integer value to the appropriate type.
  2765. StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
  2766. if (ValTy->isPointerTy())
  2767. StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
  2768. else if (ValTy->isFloatingPointTy())
  2769. StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
  2770. assert(StoreVal->getType() == ValTy && "Type mismatch!");
  2771. // If the requested value was a vector constant, create it.
  2772. if (EltTy->isVectorTy()) {
  2773. unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
  2774. StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
  2775. }
  2776. }
  2777. new StoreInst(StoreVal, EltPtr, MI);
  2778. continue;
  2779. }
  2780. // Otherwise, if we're storing a byte variable, use a memset call for
  2781. // this element.
  2782. }
  2783. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  2784. if (!EltSize)
  2785. continue;
  2786. IRBuilder<> Builder(MI);
  2787. // Finally, insert the meminst for this element.
  2788. if (isa<MemSetInst>(MI)) {
  2789. Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
  2790. MI->isVolatile());
  2791. } else {
  2792. assert(isa<MemTransferInst>(MI));
  2793. Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
  2794. Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
  2795. if (isa<MemCpyInst>(MI))
  2796. Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,
  2797. MI->isVolatile());
  2798. else
  2799. Builder.CreateMemMove(Dst, Src, EltSize, OtherEltAlign,
  2800. MI->isVolatile());
  2801. }
  2802. }
  2803. // Remove the use so that the caller can keep iterating over its other users
  2804. MI->setOperand(0, UndefValue::get(MI->getOperand(0)->getType()));
  2805. if (isa<MemTransferInst>(MI))
  2806. MI->setOperand(1, UndefValue::get(MI->getOperand(1)->getType()));
  2807. DeadInsts.push_back(MI);
  2808. }
  2809. void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
  2810. Type *DstTy = BCI->getType();
  2811. Value *Val = BCI->getOperand(0);
  2812. Type *SrcTy = Val->getType();
  2813. if (!DstTy->isPointerTy()) {
  2814. assert(0 && "Type mismatch.");
  2815. return;
  2816. }
  2817. if (!SrcTy->isPointerTy()) {
  2818. assert(0 && "Type mismatch.");
  2819. return;
  2820. }
  2821. DstTy = DstTy->getPointerElementType();
  2822. SrcTy = SrcTy->getPointerElementType();
  2823. if (!DstTy->isStructTy()) {
  2824. assert(0 && "Type mismatch.");
  2825. return;
  2826. }
  2827. if (!SrcTy->isStructTy()) {
  2828. assert(0 && "Type mismatch.");
  2829. return;
  2830. }
  2831. // Only support bitcast to parent struct type.
  2832. StructType *DstST = cast<StructType>(DstTy);
  2833. StructType *SrcST = cast<StructType>(SrcTy);
  2834. bool bTypeMatch = false;
  2835. unsigned level = 0;
  2836. while (SrcST) {
  2837. level++;
  2838. Type *EltTy = SrcST->getElementType(0);
  2839. if (EltTy == DstST) {
  2840. bTypeMatch = true;
  2841. break;
  2842. }
  2843. SrcST = dyn_cast<StructType>(EltTy);
  2844. }
  2845. if (!bTypeMatch) {
  2846. assert(0 && "Type mismatch.");
  2847. return;
  2848. }
  2849. std::vector<Value*> idxList(level+1);
  2850. ConstantInt *zeroIdx = ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
  2851. for (unsigned i=0;i<(level+1);i++)
  2852. idxList[i] = zeroIdx;
  2853. IRBuilder<> Builder(BCI);
  2854. Instruction *GEP = cast<Instruction>(Builder.CreateInBoundsGEP(Val, idxList));
  2855. BCI->replaceAllUsesWith(GEP);
  2856. BCI->eraseFromParent();
  2857. IRBuilder<> GEPBuilder(GEP);
  2858. RewriteForGEP(cast<GEPOperator>(GEP), GEPBuilder);
  2859. }
  2860. /// RewriteCallArg - For Functions which don't flat,
  2861. /// replace OldVal with alloca and
  2862. /// copy in copy out data between alloca and flattened NewElts
  2863. /// in CallInst.
  2864. void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
  2865. bool bOut) {
  2866. Function *F = CI->getParent()->getParent();
  2867. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  2868. const DataLayout &DL = F->getParent()->getDataLayout();
  2869. Value *userTyV = CI->getArgOperand(ArgIdx);
  2870. PointerType *userTy = cast<PointerType>(userTyV->getType());
  2871. Type *userTyElt = userTy->getElementType();
  2872. Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt);
  2873. IRBuilder<> Builder(CI);
  2874. if (bIn) {
  2875. MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
  2876. Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false));
  2877. RewriteMemIntrin(cpy, cpy->getRawSource());
  2878. }
  2879. CI->setArgOperand(ArgIdx, Alloca);
  2880. if (bOut) {
  2881. Builder.SetInsertPoint(CI->getNextNode());
  2882. MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
  2883. userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false));
  2884. RewriteMemIntrin(cpy, cpy->getRawSource());
  2885. }
  2886. }
  2887. /// RewriteCall - Replace OldVal with flattened NewElts in CallInst.
  2888. void SROA_Helper::RewriteCall(CallInst *CI) {
  2889. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  2890. Function *F = CI->getCalledFunction();
  2891. if (group != HLOpcodeGroup::NotHL) {
  2892. unsigned opcode = GetHLOpcode(CI);
  2893. if (group == HLOpcodeGroup::HLIntrinsic) {
  2894. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  2895. switch (IOP) {
  2896. case IntrinsicOp::MOP_Append: {
  2897. // Buffer Append already expand in code gen.
  2898. // Must be OutputStream Append here.
  2899. SmallVector<Value *, 4> flatArgs;
  2900. for (Value *arg : CI->arg_operands()) {
  2901. if (arg == OldVal) {
  2902. // Flatten to arg.
  2903. // Every Elt has a pointer type.
  2904. // For Append, it's not a problem.
  2905. for (Value *Elt : NewElts)
  2906. flatArgs.emplace_back(Elt);
  2907. } else
  2908. flatArgs.emplace_back(arg);
  2909. }
  2910. SmallVector<Type *, 4> flatParamTys;
  2911. for (Value *arg : flatArgs)
  2912. flatParamTys.emplace_back(arg->getType());
  2913. // Don't need flat return type for Append.
  2914. FunctionType *flatFuncTy =
  2915. FunctionType::get(CI->getType(), flatParamTys, false);
  2916. Function *flatF =
  2917. GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
  2918. IRBuilder<> Builder(CI);
  2919. Builder.CreateCall(flatF, flatArgs);
  2920. // Append returns void, so it's not used by other instructions
  2921. // and we don't need to replace it with flatCI.
  2922. // However, we don't want to visit the same append again
  2923. // when SROA'ing other arguments, as that would be O(n^2)
  2924. // and we would attempt double-deleting the original call.
  2925. for (auto& opit : CI->operands())
  2926. opit.set(UndefValue::get(opit->getType()));
  2927. DeadInsts.push_back(CI);
  2928. } break;
  2929. case IntrinsicOp::IOP_TraceRay: {
  2930. if (OldVal ==
  2931. CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) {
  2932. RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx,
  2933. /*bIn*/ true, /*bOut*/ false);
  2934. } else {
  2935. DXASSERT(OldVal ==
  2936. CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx),
  2937. "else invalid TraceRay");
  2938. RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx,
  2939. /*bIn*/ true, /*bOut*/ true);
  2940. }
  2941. } break;
  2942. case IntrinsicOp::IOP_ReportHit: {
  2943. RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx,
  2944. /*bIn*/ true, /*bOut*/ false);
  2945. } break;
  2946. case IntrinsicOp::IOP_CallShader: {
  2947. RewriteCallArg(CI, HLOperandIndex::kBinaryOpSrc1Idx,
  2948. /*bIn*/ true, /*bOut*/ true);
  2949. } break;
  2950. default:
  2951. DXASSERT(0, "cannot flatten hlsl intrinsic.");
  2952. }
  2953. }
  2954. // TODO: check other high level dx operations if need to.
  2955. } else {
  2956. DXASSERT(0, "should done at inline");
  2957. }
  2958. }
  2959. /// RewriteForAddrSpaceCast - Rewrite the AddrSpaceCast, either ConstExpr or Inst.
  2960. void SROA_Helper::RewriteForAddrSpaceCast(Value *CE,
  2961. IRBuilder<> &Builder) {
  2962. SmallVector<Value *, 8> NewCasts;
  2963. // create new AddrSpaceCast.
  2964. for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
  2965. Value *NewCast = Builder.CreateAddrSpaceCast(
  2966. NewElts[i],
  2967. PointerType::get(NewElts[i]->getType()->getPointerElementType(),
  2968. CE->getType()->getPointerAddressSpace()));
  2969. NewCasts.emplace_back(NewCast);
  2970. }
  2971. SROA_Helper helper(CE, NewCasts, DeadInsts, typeSys, DL);
  2972. helper.RewriteForScalarRepl(CE, Builder);
  2973. // Remove the use so that the caller can keep iterating over its other users
  2974. DXASSERT(CE->user_empty(), "All uses of the addrspacecast should have been eliminated");
  2975. if (Instruction *I = dyn_cast<Instruction>(CE))
  2976. I->eraseFromParent();
  2977. else
  2978. cast<Constant>(CE)->destroyConstant();
  2979. }
  2980. /// RewriteForConstExpr - Rewrite the GEP which is ConstantExpr.
  2981. void SROA_Helper::RewriteForConstExpr(ConstantExpr *CE, IRBuilder<> &Builder) {
  2982. if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
  2983. if (OldVal == GEP->getPointerOperand()) {
  2984. // Flatten GEP.
  2985. RewriteForGEP(GEP, Builder);
  2986. return;
  2987. }
  2988. }
  2989. if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  2990. if (OldVal == CE->getOperand(0)) {
  2991. // Flatten AddrSpaceCast.
  2992. RewriteForAddrSpaceCast(CE, Builder);
  2993. return;
  2994. }
  2995. }
  2996. for (Value::use_iterator UI = CE->use_begin(), E = CE->use_end(); UI != E;) {
  2997. Use &TheUse = *UI++;
  2998. if (Instruction *I = dyn_cast<Instruction>(TheUse.getUser())) {
  2999. IRBuilder<> tmpBuilder(I);
  3000. // Replace CE with constInst.
  3001. Instruction *tmpInst = CE->getAsInstruction();
  3002. tmpBuilder.Insert(tmpInst);
  3003. TheUse.set(tmpInst);
  3004. }
  3005. else {
  3006. RewriteForConstExpr(cast<ConstantExpr>(TheUse.getUser()), Builder);
  3007. }
  3008. }
  3009. // Remove the use so that the caller can keep iterating over its other users
  3010. DXASSERT(CE->user_empty(), "All uses of the constantexpr should have been eliminated");
  3011. CE->destroyConstant();
  3012. }
  3013. /// RewriteForScalarRepl - OldVal is being split into NewElts, so rewrite
  3014. /// users of V, which references it, to use the separate elements.
  3015. void SROA_Helper::RewriteForScalarRepl(Value *V, IRBuilder<> &Builder) {
  3016. // Don't iterate upon the uses explicitly because we'll be removing them,
  3017. // and potentially adding new ones (if expanding memcpys) during the iteration.
  3018. Use* PrevUse = nullptr;
  3019. while (!V->use_empty()) {
  3020. Use &TheUse = *V->use_begin();
  3021. DXASSERT_LOCALVAR(PrevUse, &TheUse != PrevUse,
  3022. "Infinite loop while SROA'ing value, use isn't getting eliminated.");
  3023. PrevUse = &TheUse;
  3024. // Each of these must either call ->eraseFromParent()
  3025. // or null out the use of V so that we make progress.
  3026. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(TheUse.getUser())) {
  3027. RewriteForConstExpr(CE, Builder);
  3028. }
  3029. else {
  3030. Instruction *User = cast<Instruction>(TheUse.getUser());
  3031. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  3032. IRBuilder<> Builder(GEP);
  3033. RewriteForGEP(cast<GEPOperator>(GEP), Builder);
  3034. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(User))
  3035. RewriteForLoad(ldInst);
  3036. else if (StoreInst *stInst = dyn_cast<StoreInst>(User))
  3037. RewriteForStore(stInst);
  3038. else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User))
  3039. RewriteMemIntrin(MI, V);
  3040. else if (CallInst *CI = dyn_cast<CallInst>(User))
  3041. RewriteCall(CI);
  3042. else if (BitCastInst *BCI = dyn_cast<BitCastInst>(User))
  3043. RewriteBitCast(BCI);
  3044. else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(User)) {
  3045. RewriteForAddrSpaceCast(CI, Builder);
  3046. } else {
  3047. assert(0 && "not support.");
  3048. }
  3049. }
  3050. }
  3051. }
  3052. static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
  3053. ArrayRef<ArrayType *> nestArrayTys) {
  3054. Type *newAT = FinalEltTy;
  3055. for (auto ArrayTy = nestArrayTys.rbegin(), E=nestArrayTys.rend(); ArrayTy != E;
  3056. ++ArrayTy)
  3057. newAT = ArrayType::get(newAT, (*ArrayTy)->getNumElements());
  3058. return cast<ArrayType>(newAT);
  3059. }
  3060. /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  3061. /// Then do SROA on V.
  3062. bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
  3063. IRBuilder<> &Builder, bool bFlatVector,
  3064. bool hasPrecise, DxilTypeSystem &typeSys,
  3065. const DataLayout &DL,
  3066. SmallVector<Value *, 32> &DeadInsts) {
  3067. DEBUG(dbgs() << "Found inst to SROA: " << *V << '\n');
  3068. Type *Ty = V->getType();
  3069. // Skip none pointer types.
  3070. if (!Ty->isPointerTy())
  3071. return false;
  3072. Ty = Ty->getPointerElementType();
  3073. // Skip none aggregate types.
  3074. if (!Ty->isAggregateType())
  3075. return false;
  3076. // Skip matrix types.
  3077. if (dxilutil::IsHLSLMatrixType(Ty))
  3078. return false;
  3079. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  3080. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  3081. // Skip HLSL object types.
  3082. if (dxilutil::IsHLSLObjectType(ST)) {
  3083. return false;
  3084. }
  3085. unsigned numTypes = ST->getNumContainedTypes();
  3086. Elts.reserve(numTypes);
  3087. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  3088. // Skip empty struct.
  3089. if (SA && SA->IsEmptyStruct())
  3090. return true;
  3091. for (int i = 0, e = numTypes; i != e; ++i) {
  3092. AllocaInst *NA = AllocaBuilder.CreateAlloca(ST->getContainedType(i), nullptr, V->getName() + "." + Twine(i));
  3093. bool markPrecise = hasPrecise;
  3094. if (SA) {
  3095. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  3096. markPrecise |= FA.IsPrecise();
  3097. }
  3098. if (markPrecise)
  3099. HLModule::MarkPreciseAttributeWithMetadata(NA);
  3100. Elts.push_back(NA);
  3101. }
  3102. } else {
  3103. ArrayType *AT = cast<ArrayType>(Ty);
  3104. if (AT->getNumContainedTypes() == 0) {
  3105. // Skip case like [0 x %struct].
  3106. return false;
  3107. }
  3108. Type *ElTy = AT->getElementType();
  3109. SmallVector<ArrayType *, 4> nestArrayTys;
  3110. nestArrayTys.emplace_back(AT);
  3111. // support multi level of array
  3112. while (ElTy->isArrayTy()) {
  3113. ArrayType *ElAT = cast<ArrayType>(ElTy);
  3114. nestArrayTys.emplace_back(ElAT);
  3115. ElTy = ElAT->getElementType();
  3116. }
  3117. if (ElTy->isStructTy() &&
  3118. // Skip Matrix type.
  3119. !dxilutil::IsHLSLMatrixType(ElTy)) {
  3120. if (!dxilutil::IsHLSLObjectType(ElTy)) {
  3121. // for array of struct
  3122. // split into arrays of struct elements
  3123. StructType *ElST = cast<StructType>(ElTy);
  3124. unsigned numTypes = ElST->getNumContainedTypes();
  3125. Elts.reserve(numTypes);
  3126. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ElST);
  3127. // Skip empty struct.
  3128. if (SA && SA->IsEmptyStruct())
  3129. return true;
  3130. for (int i = 0, e = numTypes; i != e; ++i) {
  3131. AllocaInst *NA = AllocaBuilder.CreateAlloca(
  3132. CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys),
  3133. nullptr, V->getName() + "." + Twine(i));
  3134. bool markPrecise = hasPrecise;
  3135. if (SA) {
  3136. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  3137. markPrecise |= FA.IsPrecise();
  3138. }
  3139. if (markPrecise)
  3140. HLModule::MarkPreciseAttributeWithMetadata(NA);
  3141. Elts.push_back(NA);
  3142. }
  3143. } else {
  3144. // For local resource array which not dynamic indexing,
  3145. // split it.
  3146. if (dxilutil::HasDynamicIndexing(V) ||
  3147. // Only support 1 dim split.
  3148. nestArrayTys.size() > 1)
  3149. return false;
  3150. for (int i = 0, e = AT->getNumElements(); i != e; ++i) {
  3151. AllocaInst *NA = AllocaBuilder.CreateAlloca(ElTy, nullptr,
  3152. V->getName() + "." + Twine(i));
  3153. Elts.push_back(NA);
  3154. }
  3155. }
  3156. } else if (ElTy->isVectorTy()) {
  3157. // Skip vector if required.
  3158. if (!bFlatVector)
  3159. return false;
  3160. // for array of vector
  3161. // split into arrays of scalar
  3162. VectorType *ElVT = cast<VectorType>(ElTy);
  3163. Elts.reserve(ElVT->getNumElements());
  3164. ArrayType *scalarArrayTy = CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
  3165. for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
  3166. AllocaInst *NA = AllocaBuilder.CreateAlloca(scalarArrayTy, nullptr,
  3167. V->getName() + "." + Twine(i));
  3168. if (hasPrecise)
  3169. HLModule::MarkPreciseAttributeWithMetadata(NA);
  3170. Elts.push_back(NA);
  3171. }
  3172. } else
  3173. // Skip array of basic types.
  3174. return false;
  3175. }
  3176. // Now that we have created the new alloca instructions, rewrite all the
  3177. // uses of the old alloca.
  3178. SROA_Helper helper(V, Elts, DeadInsts, typeSys, DL);
  3179. helper.RewriteForScalarRepl(V, Builder);
  3180. return true;
  3181. }
  3182. static Constant *GetEltInit(Type *Ty, Constant *Init, unsigned idx,
  3183. Type *EltTy) {
  3184. if (isa<UndefValue>(Init))
  3185. return UndefValue::get(EltTy);
  3186. if (dyn_cast<StructType>(Ty)) {
  3187. return Init->getAggregateElement(idx);
  3188. } else if (dyn_cast<VectorType>(Ty)) {
  3189. return Init->getAggregateElement(idx);
  3190. } else {
  3191. ArrayType *AT = cast<ArrayType>(Ty);
  3192. ArrayType *EltArrayTy = cast<ArrayType>(EltTy);
  3193. std::vector<Constant *> Elts;
  3194. if (!AT->getElementType()->isArrayTy()) {
  3195. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  3196. // Get Array[i]
  3197. Constant *InitArrayElt = Init->getAggregateElement(i);
  3198. // Get Array[i].idx
  3199. InitArrayElt = InitArrayElt->getAggregateElement(idx);
  3200. Elts.emplace_back(InitArrayElt);
  3201. }
  3202. return ConstantArray::get(EltArrayTy, Elts);
  3203. } else {
  3204. Type *EltTy = AT->getElementType();
  3205. ArrayType *NestEltArrayTy = cast<ArrayType>(EltArrayTy->getElementType());
  3206. // Nested array.
  3207. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  3208. // Get Array[i]
  3209. Constant *InitArrayElt = Init->getAggregateElement(i);
  3210. // Get Array[i].idx
  3211. InitArrayElt = GetEltInit(EltTy, InitArrayElt, idx, NestEltArrayTy);
  3212. Elts.emplace_back(InitArrayElt);
  3213. }
  3214. return ConstantArray::get(EltArrayTy, Elts);
  3215. }
  3216. }
  3217. }
  3218. /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
  3219. /// Then do SROA on V.
  3220. bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
  3221. std::vector<Value *> &Elts,
  3222. IRBuilder<> &Builder, bool bFlatVector,
  3223. bool hasPrecise, DxilTypeSystem &typeSys,
  3224. const DataLayout &DL,
  3225. SmallVector<Value *, 32> &DeadInsts) {
  3226. DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
  3227. Type *Ty = GV->getType();
  3228. // Skip none pointer types.
  3229. if (!Ty->isPointerTy())
  3230. return false;
  3231. Ty = Ty->getPointerElementType();
  3232. // Skip none aggregate types.
  3233. if (!Ty->isAggregateType() && !bFlatVector)
  3234. return false;
  3235. // Skip basic types.
  3236. if (Ty->isSingleValueType() && !Ty->isVectorTy())
  3237. return false;
  3238. // Skip matrix types.
  3239. if (dxilutil::IsHLSLMatrixType(Ty))
  3240. return false;
  3241. Module *M = GV->getParent();
  3242. Constant *Init = GV->hasInitializer() ? GV->getInitializer() : UndefValue::get(Ty);
  3243. bool isConst = GV->isConstant();
  3244. GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
  3245. unsigned AddressSpace = GV->getType()->getAddressSpace();
  3246. GlobalValue::LinkageTypes linkage = GV->getLinkage();
  3247. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  3248. // Skip HLSL object types.
  3249. if (dxilutil::IsHLSLObjectType(ST))
  3250. return false;
  3251. unsigned numTypes = ST->getNumContainedTypes();
  3252. Elts.reserve(numTypes);
  3253. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  3254. for (int i = 0, e = numTypes; i != e; ++i) {
  3255. Constant *EltInit = GetEltInit(Ty, Init, i, ST->getElementType(i));
  3256. GlobalVariable *EltGV = new llvm::GlobalVariable(
  3257. *M, ST->getContainedType(i), /*IsConstant*/ isConst, linkage,
  3258. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  3259. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  3260. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  3261. // TODO: set precise.
  3262. // if (hasPrecise || FA.IsPrecise())
  3263. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  3264. Elts.push_back(EltGV);
  3265. }
  3266. } else if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
  3267. // TODO: support dynamic indexing on vector by change it to array.
  3268. unsigned numElts = VT->getNumElements();
  3269. Elts.reserve(numElts);
  3270. Type *EltTy = VT->getElementType();
  3271. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  3272. for (int i = 0, e = numElts; i != e; ++i) {
  3273. Constant *EltInit = GetEltInit(Ty, Init, i, EltTy);
  3274. GlobalVariable *EltGV = new llvm::GlobalVariable(
  3275. *M, EltTy, /*IsConstant*/ isConst, linkage,
  3276. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  3277. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  3278. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  3279. // TODO: set precise.
  3280. // if (hasPrecise || FA.IsPrecise())
  3281. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  3282. Elts.push_back(EltGV);
  3283. }
  3284. } else {
  3285. ArrayType *AT = cast<ArrayType>(Ty);
  3286. if (AT->getNumContainedTypes() == 0) {
  3287. // Skip case like [0 x %struct].
  3288. return false;
  3289. }
  3290. Type *ElTy = AT->getElementType();
  3291. SmallVector<ArrayType *, 4> nestArrayTys;
  3292. nestArrayTys.emplace_back(AT);
  3293. // support multi level of array
  3294. while (ElTy->isArrayTy()) {
  3295. ArrayType *ElAT = cast<ArrayType>(ElTy);
  3296. nestArrayTys.emplace_back(ElAT);
  3297. ElTy = ElAT->getElementType();
  3298. }
  3299. if (ElTy->isStructTy() &&
  3300. // Skip Matrix type.
  3301. !dxilutil::IsHLSLMatrixType(ElTy)) {
  3302. // for array of struct
  3303. // split into arrays of struct elements
  3304. StructType *ElST = cast<StructType>(ElTy);
  3305. unsigned numTypes = ElST->getNumContainedTypes();
  3306. Elts.reserve(numTypes);
  3307. //DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ElST);
  3308. for (int i = 0, e = numTypes; i != e; ++i) {
  3309. Type *EltTy =
  3310. CreateNestArrayTy(ElST->getContainedType(i), nestArrayTys);
  3311. Constant *EltInit = GetEltInit(Ty, Init, i, EltTy);
  3312. GlobalVariable *EltGV = new llvm::GlobalVariable(
  3313. *M, EltTy, /*IsConstant*/ isConst, linkage,
  3314. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  3315. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  3316. //DxilFieldAnnotation &FA = SA->GetFieldAnnotation(i);
  3317. // TODO: set precise.
  3318. // if (hasPrecise || FA.IsPrecise())
  3319. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  3320. Elts.push_back(EltGV);
  3321. }
  3322. } else if (ElTy->isVectorTy()) {
  3323. // Skip vector if required.
  3324. if (!bFlatVector)
  3325. return false;
  3326. // for array of vector
  3327. // split into arrays of scalar
  3328. VectorType *ElVT = cast<VectorType>(ElTy);
  3329. Elts.reserve(ElVT->getNumElements());
  3330. ArrayType *scalarArrayTy =
  3331. CreateNestArrayTy(ElVT->getElementType(), nestArrayTys);
  3332. for (int i = 0, e = ElVT->getNumElements(); i != e; ++i) {
  3333. Constant *EltInit = GetEltInit(Ty, Init, i, scalarArrayTy);
  3334. GlobalVariable *EltGV = new llvm::GlobalVariable(
  3335. *M, scalarArrayTy, /*IsConstant*/ isConst, linkage,
  3336. /*InitVal*/ EltInit, GV->getName() + "." + Twine(i),
  3337. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  3338. // TODO: set precise.
  3339. // if (hasPrecise)
  3340. // HLModule::MarkPreciseAttributeWithMetadata(NA);
  3341. Elts.push_back(EltGV);
  3342. }
  3343. } else
  3344. // Skip array of basic types.
  3345. return false;
  3346. }
  3347. // Now that we have created the new alloca instructions, rewrite all the
  3348. // uses of the old alloca.
  3349. SROA_Helper helper(GV, Elts, DeadInsts, typeSys, DL);
  3350. helper.RewriteForScalarRepl(GV, Builder);
  3351. return true;
  3352. }
  3353. struct PointerStatus {
  3354. /// Keep track of what stores to the pointer look like.
  3355. enum StoredType {
  3356. /// There is no store to this pointer. It can thus be marked constant.
  3357. NotStored,
  3358. /// This ptr is a global, and is stored to, but the only thing stored is the
  3359. /// constant it
  3360. /// was initialized with. This is only tracked for scalar globals.
  3361. InitializerStored,
  3362. /// This ptr is stored to, but only its initializer and one other value
  3363. /// is ever stored to it. If this global isStoredOnce, we track the value
  3364. /// stored to it in StoredOnceValue below. This is only tracked for scalar
  3365. /// globals.
  3366. StoredOnce,
  3367. /// This ptr is only assigned by a memcpy.
  3368. MemcopyDestOnce,
  3369. /// This ptr is stored to by multiple values or something else that we
  3370. /// cannot track.
  3371. Stored
  3372. } storedType;
  3373. /// Keep track of what loaded from the pointer look like.
  3374. enum LoadedType {
  3375. /// There is no load to this pointer. It can thus be marked constant.
  3376. NotLoaded,
  3377. /// This ptr is only used by a memcpy.
  3378. MemcopySrcOnce,
  3379. /// This ptr is loaded to by multiple instructions or something else that we
  3380. /// cannot track.
  3381. Loaded
  3382. } loadedType;
  3383. /// If only one value (besides the initializer constant) is ever stored to
  3384. /// this global, keep track of what value it is.
  3385. Value *StoredOnceValue;
  3386. /// Memcpy which this ptr is used.
  3387. std::unordered_set<MemCpyInst *> memcpySet;
  3388. /// Memcpy which use this ptr as dest.
  3389. MemCpyInst *StoringMemcpy;
  3390. /// Memcpy which use this ptr as src.
  3391. MemCpyInst *LoadingMemcpy;
  3392. /// These start out null/false. When the first accessing function is noticed,
  3393. /// it is recorded. When a second different accessing function is noticed,
  3394. /// HasMultipleAccessingFunctions is set to true.
  3395. const Function *AccessingFunction;
  3396. bool HasMultipleAccessingFunctions;
  3397. /// Size of the ptr.
  3398. unsigned Size;
  3399. /// Look at all uses of the global and fill in the GlobalStatus structure. If
  3400. /// the global has its address taken, return true to indicate we can't do
  3401. /// anything with it.
  3402. static void analyzePointer(const Value *V, PointerStatus &PS,
  3403. DxilTypeSystem &typeSys, bool bStructElt);
  3404. PointerStatus(unsigned size)
  3405. : storedType(StoredType::NotStored), loadedType(LoadedType::NotLoaded), StoredOnceValue(nullptr),
  3406. StoringMemcpy(nullptr), LoadingMemcpy(nullptr),
  3407. AccessingFunction(nullptr), HasMultipleAccessingFunctions(false),
  3408. Size(size) {}
  3409. void MarkAsStored() {
  3410. storedType = StoredType::Stored;
  3411. StoredOnceValue = nullptr;
  3412. }
  3413. void MarkAsLoaded() { loadedType = LoadedType::Loaded; }
  3414. };
  3415. void PointerStatus::analyzePointer(const Value *V, PointerStatus &PS,
  3416. DxilTypeSystem &typeSys, bool bStructElt) {
  3417. for (const User *U : V->users()) {
  3418. if (const Instruction *I = dyn_cast<Instruction>(U)) {
  3419. const Function *F = I->getParent()->getParent();
  3420. if (!PS.AccessingFunction) {
  3421. PS.AccessingFunction = F;
  3422. } else {
  3423. if (F != PS.AccessingFunction)
  3424. PS.HasMultipleAccessingFunctions = true;
  3425. }
  3426. }
  3427. if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(U)) {
  3428. analyzePointer(BC, PS, typeSys, bStructElt);
  3429. } else if (const MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
  3430. // Do not collect memcpy on struct GEP use.
  3431. // These memcpy will be flattened in next level.
  3432. if (!bStructElt) {
  3433. MemCpyInst *MI = const_cast<MemCpyInst *>(MC);
  3434. PS.memcpySet.insert(MI);
  3435. bool bFullCopy = false;
  3436. if (ConstantInt *Length = dyn_cast<ConstantInt>(MC->getLength())) {
  3437. bFullCopy = PS.Size == Length->getLimitedValue()
  3438. || PS.Size == 0 || Length->getLimitedValue() == 0; // handle unbounded arrays
  3439. }
  3440. if (MC->getRawDest() == V) {
  3441. if (bFullCopy &&
  3442. PS.storedType == StoredType::NotStored) {
  3443. PS.storedType = StoredType::MemcopyDestOnce;
  3444. PS.StoringMemcpy = MI;
  3445. } else {
  3446. PS.MarkAsStored();
  3447. PS.StoringMemcpy = nullptr;
  3448. }
  3449. } else if (MC->getRawSource() == V) {
  3450. if (bFullCopy &&
  3451. PS.loadedType == LoadedType::NotLoaded) {
  3452. PS.loadedType = LoadedType::MemcopySrcOnce;
  3453. PS.LoadingMemcpy = MI;
  3454. } else {
  3455. PS.MarkAsLoaded();
  3456. PS.LoadingMemcpy = nullptr;
  3457. }
  3458. }
  3459. } else {
  3460. if (MC->getRawDest() == V) {
  3461. PS.MarkAsStored();
  3462. } else {
  3463. DXASSERT(MC->getRawSource() == V, "must be source here");
  3464. PS.MarkAsLoaded();
  3465. }
  3466. }
  3467. } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
  3468. gep_type_iterator GEPIt = gep_type_begin(GEP);
  3469. gep_type_iterator GEPEnd = gep_type_end(GEP);
  3470. // Skip pointer idx.
  3471. GEPIt++;
  3472. // Struct elt will be flattened in next level.
  3473. bool bStructElt = (GEPIt != GEPEnd) && GEPIt->isStructTy();
  3474. analyzePointer(GEP, PS, typeSys, bStructElt);
  3475. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
  3476. Value *V = SI->getOperand(0);
  3477. if (PS.storedType == StoredType::NotStored) {
  3478. PS.storedType = StoredType::StoredOnce;
  3479. PS.StoredOnceValue = V;
  3480. } else {
  3481. PS.MarkAsStored();
  3482. }
  3483. } else if (dyn_cast<LoadInst>(U)) {
  3484. PS.MarkAsLoaded();
  3485. } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
  3486. Function *F = CI->getCalledFunction();
  3487. DxilFunctionAnnotation *annotation = typeSys.GetFunctionAnnotation(F);
  3488. if (!annotation) {
  3489. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
  3490. switch (group) {
  3491. case HLOpcodeGroup::HLMatLoadStore: {
  3492. HLMatLoadStoreOpcode opcode =
  3493. static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  3494. switch (opcode) {
  3495. case HLMatLoadStoreOpcode::ColMatLoad:
  3496. case HLMatLoadStoreOpcode::RowMatLoad:
  3497. PS.MarkAsLoaded();
  3498. break;
  3499. case HLMatLoadStoreOpcode::ColMatStore:
  3500. case HLMatLoadStoreOpcode::RowMatStore:
  3501. PS.MarkAsStored();
  3502. break;
  3503. default:
  3504. DXASSERT(0, "invalid opcode");
  3505. PS.MarkAsStored();
  3506. PS.MarkAsLoaded();
  3507. }
  3508. } break;
  3509. case HLOpcodeGroup::HLSubscript: {
  3510. HLSubscriptOpcode opcode =
  3511. static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
  3512. switch (opcode) {
  3513. case HLSubscriptOpcode::VectorSubscript:
  3514. case HLSubscriptOpcode::ColMatElement:
  3515. case HLSubscriptOpcode::ColMatSubscript:
  3516. case HLSubscriptOpcode::RowMatElement:
  3517. case HLSubscriptOpcode::RowMatSubscript:
  3518. analyzePointer(CI, PS, typeSys, bStructElt);
  3519. break;
  3520. default:
  3521. // Rest are resource ptr like buf[i].
  3522. // Only read of resource handle.
  3523. PS.MarkAsLoaded();
  3524. break;
  3525. }
  3526. } break;
  3527. default: {
  3528. // If not sure its out param or not. Take as out param.
  3529. PS.MarkAsStored();
  3530. PS.MarkAsLoaded();
  3531. }
  3532. }
  3533. continue;
  3534. }
  3535. unsigned argSize = F->arg_size();
  3536. for (unsigned i = 0; i < argSize; i++) {
  3537. Value *arg = CI->getArgOperand(i);
  3538. if (V == arg) {
  3539. // Do not replace struct arg.
  3540. // Mark stored and loaded to disable replace.
  3541. PS.MarkAsStored();
  3542. PS.MarkAsLoaded();
  3543. }
  3544. }
  3545. }
  3546. }
  3547. }
  3548. static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder) {
  3549. for (auto it = C->user_begin(); it != C->user_end(); ) {
  3550. User *U = *(it++);
  3551. if (Instruction *I = dyn_cast<Instruction>(U)) {
  3552. I->replaceUsesOfWith(C, V);
  3553. } else {
  3554. // Skip unused ConstantExpr.
  3555. if (U->user_empty())
  3556. continue;
  3557. ConstantExpr *CE = cast<ConstantExpr>(U);
  3558. Instruction *Inst = CE->getAsInstruction();
  3559. Builder.Insert(Inst);
  3560. Inst->replaceUsesOfWith(C, V);
  3561. ReplaceConstantWithInst(CE, Inst, Builder);
  3562. }
  3563. }
  3564. C->removeDeadConstantUsers();
  3565. }
  3566. static void ReplaceUnboundedArrayUses(Value *V, Value *Src, IRBuilder<> &Builder) {
  3567. for (auto it = V->user_begin(); it != V->user_end(); ) {
  3568. User *U = *(it++);
  3569. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
  3570. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  3571. Value *NewGEP = Builder.CreateGEP(Src, idxList);
  3572. GEP->replaceAllUsesWith(NewGEP);
  3573. } else if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
  3574. BC->setOperand(0, Src);
  3575. } else {
  3576. DXASSERT(false, "otherwise unbounded array used in unexpected instruction");
  3577. }
  3578. }
  3579. }
  3580. static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC) {
  3581. Type *TyV = V->getType()->getPointerElementType();
  3582. Type *TySrc = Src->getType()->getPointerElementType();
  3583. if (Constant *C = dyn_cast<Constant>(V)) {
  3584. if (TyV == TySrc) {
  3585. if (isa<Constant>(Src)) {
  3586. V->replaceAllUsesWith(Src);
  3587. } else {
  3588. // Replace Constant with a non-Constant.
  3589. IRBuilder<> Builder(MC);
  3590. ReplaceConstantWithInst(C, Src, Builder);
  3591. }
  3592. } else {
  3593. IRBuilder<> Builder(MC);
  3594. Src = Builder.CreateBitCast(Src, V->getType());
  3595. ReplaceConstantWithInst(C, Src, Builder);
  3596. }
  3597. } else {
  3598. if (TyV == TySrc) {
  3599. if (V != Src)
  3600. V->replaceAllUsesWith(Src);
  3601. } else {
  3602. DXASSERT((TyV->isArrayTy() && TySrc->isArrayTy()) &&
  3603. (TyV->getArrayNumElements() == 0 ||
  3604. TySrc->getArrayNumElements() == 0),
  3605. "otherwise mismatched types in memcpy are not unbounded array");
  3606. IRBuilder<> Builder(MC);
  3607. ReplaceUnboundedArrayUses(V, Src, Builder);
  3608. }
  3609. }
  3610. Value *RawDest = MC->getOperand(0);
  3611. Value *RawSrc = MC->getOperand(1);
  3612. MC->eraseFromParent();
  3613. if (Instruction *I = dyn_cast<Instruction>(RawDest)) {
  3614. if (I->user_empty())
  3615. I->eraseFromParent();
  3616. }
  3617. if (Instruction *I = dyn_cast<Instruction>(RawSrc)) {
  3618. if (I->user_empty())
  3619. I->eraseFromParent();
  3620. }
  3621. }
  3622. static bool ReplaceUseOfZeroInitEntry(Instruction *I, Value *V) {
  3623. BasicBlock *BB = I->getParent();
  3624. Function *F = I->getParent()->getParent();
  3625. for (auto U = V->user_begin(); U != V->user_end(); ) {
  3626. Instruction *UI = dyn_cast<Instruction>(*(U++));
  3627. if (!UI)
  3628. continue;
  3629. if (UI->getParent()->getParent() != F)
  3630. continue;
  3631. if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
  3632. if (!ReplaceUseOfZeroInitEntry(I, UI))
  3633. return false;
  3634. else
  3635. continue;
  3636. }
  3637. if (BB != UI->getParent() || UI == I)
  3638. continue;
  3639. // I is the last inst in the block after split.
  3640. // Any inst in current block is before I.
  3641. if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  3642. LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
  3643. LI->eraseFromParent();
  3644. continue;
  3645. }
  3646. return false;
  3647. }
  3648. return true;
  3649. }
  3650. static bool ReplaceUseOfZeroInitPostDom(Instruction *I, Value *V,
  3651. PostDominatorTree &PDT) {
  3652. BasicBlock *BB = I->getParent();
  3653. Function *F = I->getParent()->getParent();
  3654. for (auto U = V->user_begin(); U != V->user_end(); ) {
  3655. Instruction *UI = dyn_cast<Instruction>(*(U++));
  3656. if (!UI)
  3657. continue;
  3658. if (UI->getParent()->getParent() != F)
  3659. continue;
  3660. if (!PDT.dominates(BB, UI->getParent()))
  3661. return false;
  3662. if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
  3663. if (!ReplaceUseOfZeroInitPostDom(I, UI, PDT))
  3664. return false;
  3665. else
  3666. continue;
  3667. }
  3668. if (BB != UI->getParent() || UI == I)
  3669. continue;
  3670. // I is the last inst in the block after split.
  3671. // Any inst in current block is before I.
  3672. if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
  3673. LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
  3674. LI->eraseFromParent();
  3675. continue;
  3676. }
  3677. return false;
  3678. }
  3679. return true;
  3680. }
  3681. // When zero initialized GV has only one define, all uses before the def should
  3682. // use zero.
  3683. static bool ReplaceUseOfZeroInitBeforeDef(Instruction *I, GlobalVariable *GV) {
  3684. BasicBlock *BB = I->getParent();
  3685. Function *F = I->getParent()->getParent();
  3686. // Make sure I is the last inst for BB.
  3687. if (I != BB->getTerminator())
  3688. BB->splitBasicBlock(I->getNextNode());
  3689. if (&F->getEntryBlock() == I->getParent()) {
  3690. return ReplaceUseOfZeroInitEntry(I, GV);
  3691. } else {
  3692. // Post dominator tree.
  3693. PostDominatorTree PDT;
  3694. PDT.runOnFunction(*F);
  3695. return ReplaceUseOfZeroInitPostDom(I, GV, PDT);
  3696. }
  3697. }
  3698. bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
  3699. DxilTypeSystem &typeSys, const DataLayout &DL,
  3700. bool bAllowReplace) {
  3701. Type *Ty = V->getType();
  3702. if (!Ty->isPointerTy()) {
  3703. return false;
  3704. }
  3705. // Get access status and collect memcpy uses.
  3706. // if MemcpyOnce, replace with dest with src if dest is not out param.
  3707. // else flat memcpy.
  3708. unsigned size = DL.getTypeAllocSize(Ty->getPointerElementType());
  3709. PointerStatus PS(size);
  3710. const bool bStructElt = false;
  3711. PointerStatus::analyzePointer(V, PS, typeSys, bStructElt);
  3712. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
  3713. if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
  3714. if (PS.storedType == PointerStatus::StoredType::NotStored) {
  3715. PS.storedType = PointerStatus::StoredType::InitializerStored;
  3716. } else if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce) {
  3717. // For single mem store, if the store not dominator all users.
  3718. // Makr it as Stored.
  3719. // Case like:
  3720. // struct A { float4 x[25]; };
  3721. // A a;
  3722. // static A a2;
  3723. // void set(A aa) { aa = a; }
  3724. // call set inside entry function then use a2.
  3725. if (isa<ConstantAggregateZero>(GV->getInitializer())) {
  3726. Instruction * Memcpy = PS.StoringMemcpy;
  3727. if (!ReplaceUseOfZeroInitBeforeDef(Memcpy, GV)) {
  3728. PS.storedType = PointerStatus::StoredType::Stored;
  3729. }
  3730. }
  3731. } else {
  3732. PS.storedType = PointerStatus::StoredType::Stored;
  3733. }
  3734. }
  3735. }
  3736. if (bAllowReplace && !PS.HasMultipleAccessingFunctions) {
  3737. if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce &&
  3738. // Skip argument for input argument has input value, it is not dest once anymore.
  3739. !isa<Argument>(V)) {
  3740. // Replace with src of memcpy.
  3741. MemCpyInst *MC = PS.StoringMemcpy;
  3742. if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
  3743. Value *Src = MC->getOperand(1);
  3744. // Only remove one level bitcast generated from inline.
  3745. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Src))
  3746. Src = BC->getOperand(0);
  3747. if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
  3748. // For GEP, the ptr could have other GEP read/write.
  3749. // Only scan one GEP is not enough.
  3750. Value *Ptr = GEP->getPointerOperand();
  3751. while (GEPOperator *NestedGEP = dyn_cast<GEPOperator>(Ptr))
  3752. Ptr = NestedGEP->getPointerOperand();
  3753. if (CallInst *PtrCI = dyn_cast<CallInst>(Ptr)) {
  3754. hlsl::HLOpcodeGroup group =
  3755. hlsl::GetHLOpcodeGroup(PtrCI->getCalledFunction());
  3756. if (group == HLOpcodeGroup::HLSubscript) {
  3757. HLSubscriptOpcode opcode =
  3758. static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(PtrCI));
  3759. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  3760. // Ptr from CBuffer is safe.
  3761. ReplaceMemcpy(V, Src, MC);
  3762. return true;
  3763. }
  3764. }
  3765. }
  3766. } else if (!isa<CallInst>(Src)) {
  3767. // Resource ptr should not be replaced.
  3768. // Need to make sure src not updated after current memcpy.
  3769. // Check Src only have 1 store now.
  3770. PointerStatus SrcPS(size);
  3771. PointerStatus::analyzePointer(Src, SrcPS, typeSys, bStructElt);
  3772. if (SrcPS.storedType != PointerStatus::StoredType::Stored) {
  3773. ReplaceMemcpy(V, Src, MC);
  3774. return true;
  3775. }
  3776. }
  3777. }
  3778. } else if (PS.loadedType == PointerStatus::LoadedType::MemcopySrcOnce) {
  3779. // Replace dst of memcpy.
  3780. MemCpyInst *MC = PS.LoadingMemcpy;
  3781. if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
  3782. Value *Dest = MC->getOperand(0);
  3783. // Only remove one level bitcast generated from inline.
  3784. if (BitCastOperator *BC = dyn_cast<BitCastOperator>(Dest))
  3785. Dest = BC->getOperand(0);
  3786. // For GEP, the ptr could have other GEP read/write.
  3787. // Only scan one GEP is not enough.
  3788. // And resource ptr should not be replaced.
  3789. if (!isa<GEPOperator>(Dest) && !isa<CallInst>(Dest) &&
  3790. !isa<BitCastOperator>(Dest)) {
  3791. // Need to make sure Dest not updated after current memcpy.
  3792. // Check Dest only have 1 store now.
  3793. PointerStatus DestPS(size);
  3794. PointerStatus::analyzePointer(Dest, DestPS, typeSys, bStructElt);
  3795. if (DestPS.storedType != PointerStatus::StoredType::Stored) {
  3796. ReplaceMemcpy(Dest, V, MC);
  3797. // V still need to be flatten.
  3798. // Lower memcpy come from Dest.
  3799. return LowerMemcpy(V, annotation, typeSys, DL, bAllowReplace);
  3800. }
  3801. }
  3802. }
  3803. }
  3804. }
  3805. for (MemCpyInst *MC : PS.memcpySet) {
  3806. MemcpySplitter::SplitMemCpy(MC, DL, annotation, typeSys);
  3807. }
  3808. return false;
  3809. }
  3810. /// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts.
  3811. void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) {
  3812. UndefValue *undef = UndefValue::get(V->getType());
  3813. for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) {
  3814. Value *U = *(itU++);
  3815. // Kill memcpy, set operands to undef for call and ret, and recurse
  3816. if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
  3817. DeadInsts.emplace_back(MC);
  3818. } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
  3819. for (auto &operand : CI->operands()) {
  3820. if (operand == V)
  3821. operand.set(undef);
  3822. }
  3823. } else if (ReturnInst *Ret = dyn_cast<ReturnInst>(U)) {
  3824. Ret->setOperand(0, undef);
  3825. } else if (isa<Constant>(U) || isa<GetElementPtrInst>(U) ||
  3826. isa<BitCastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) {
  3827. // Recurse users
  3828. MarkEmptyStructUsers(U, DeadInsts);
  3829. } else {
  3830. DXASSERT(false, "otherwise, recursing unexpected empty struct user");
  3831. }
  3832. }
  3833. if (Instruction *I = dyn_cast<Instruction>(V)) {
  3834. // Only need to add no use inst here.
  3835. // DeleteDeadInst will delete everything.
  3836. if (I->user_empty())
  3837. DeadInsts.emplace_back(I);
  3838. }
  3839. }
  3840. bool SROA_Helper::IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys) {
  3841. if (isa<ArrayType>(Ty))
  3842. Ty = Ty->getArrayElementType();
  3843. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  3844. if (!dxilutil::IsHLSLMatrixType(Ty)) {
  3845. DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
  3846. if (SA && SA->IsEmptyStruct())
  3847. return true;
  3848. }
  3849. }
  3850. return false;
  3851. }
  3852. //===----------------------------------------------------------------------===//
  3853. // SROA on function parameters.
  3854. //===----------------------------------------------------------------------===//
  3855. static void LegalizeDxilInputOutputs(Function *F,
  3856. DxilFunctionAnnotation *EntryAnnotation,
  3857. const DataLayout &DL,
  3858. DxilTypeSystem &typeSys);
  3859. static void InjectReturnAfterNoReturnPreserveOutput(HLModule &HLM);
  3860. namespace {
  3861. class SROA_Parameter_HLSL : public ModulePass {
  3862. HLModule *m_pHLModule;
  3863. public:
  3864. static char ID; // Pass identification, replacement for typeid
  3865. explicit SROA_Parameter_HLSL() : ModulePass(ID) {}
  3866. const char *getPassName() const override { return "SROA Parameter HLSL"; }
  3867. static void CopyElementsOfStructsWithIdenticalLayout(IRBuilder<>& builder, Value* destPtr, Value* srcPtr, Type *ty, std::vector<unsigned>& idxlist);
  3868. static void RewriteBitcastWithIdenticalStructs(Function *F);
  3869. bool runOnModule(Module &M) override {
  3870. // Patch memcpy to cover case bitcast (gep ptr, 0,0) is transformed into
  3871. // bitcast ptr.
  3872. MemcpySplitter::PatchMemCpyWithZeroIdxGEP(M);
  3873. m_pHLModule = &M.GetOrCreateHLModule();
  3874. const DataLayout &DL = M.getDataLayout();
  3875. // Load up debug information, to cross-reference values and the instructions
  3876. // used to load them.
  3877. m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  3878. InjectReturnAfterNoReturnPreserveOutput(*m_pHLModule);
  3879. std::deque<Function *> WorkList;
  3880. std::vector<Function *> DeadHLFunctions;
  3881. for (Function &F : M.functions()) {
  3882. HLOpcodeGroup group = GetHLOpcodeGroup(&F);
  3883. // Skip HL operations.
  3884. if (group != HLOpcodeGroup::NotHL ||
  3885. group == HLOpcodeGroup::HLExtIntrinsic) {
  3886. if (F.user_empty())
  3887. DeadHLFunctions.emplace_back(&F);
  3888. continue;
  3889. }
  3890. if (F.isDeclaration()) {
  3891. // Skip llvm intrinsic.
  3892. if (F.isIntrinsic())
  3893. continue;
  3894. // Skip unused external function.
  3895. if (F.user_empty())
  3896. continue;
  3897. }
  3898. // Skip void(void) functions.
  3899. if (F.getReturnType()->isVoidTy() && F.arg_size() == 0)
  3900. continue;
  3901. // Skip library function, except to LegalizeDxilInputOutputs
  3902. if (&F != m_pHLModule->GetEntryFunction() &&
  3903. !m_pHLModule->IsEntryThatUsesSignatures(&F)) {
  3904. if (!F.isDeclaration())
  3905. LegalizeDxilInputOutputs(&F, m_pHLModule->GetFunctionAnnotation(&F),
  3906. DL, m_pHLModule->GetTypeSystem());
  3907. continue;
  3908. }
  3909. WorkList.emplace_back(&F);
  3910. }
  3911. // Remove dead hl functions here.
  3912. // This is for hl functions which has body and always inline.
  3913. for (Function *F : DeadHLFunctions) {
  3914. F->eraseFromParent();
  3915. }
  3916. // Preprocess aggregate function param used as function call arg.
  3917. for (Function *F : WorkList) {
  3918. preprocessArgUsedInCall(F);
  3919. }
  3920. // Process the worklist
  3921. while (!WorkList.empty()) {
  3922. Function *F = WorkList.front();
  3923. WorkList.pop_front();
  3924. RewriteBitcastWithIdenticalStructs(F);
  3925. createFlattenedFunction(F);
  3926. }
  3927. // Replace functions with flattened version when we flat all the functions.
  3928. for (auto Iter : funcMap)
  3929. replaceCall(Iter.first, Iter.second);
  3930. // Update patch constant function.
  3931. for (Function &F : M.functions()) {
  3932. if (F.isDeclaration())
  3933. continue;
  3934. if (!m_pHLModule->HasDxilFunctionProps(&F))
  3935. continue;
  3936. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(&F);
  3937. if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
  3938. Function *oldPatchConstantFunc =
  3939. funcProps.ShaderProps.HS.patchConstantFunc;
  3940. if (funcMap.count(oldPatchConstantFunc))
  3941. m_pHLModule->SetPatchConstantFunctionForHS(&F, funcMap[oldPatchConstantFunc]);
  3942. }
  3943. }
  3944. // Remove flattened functions.
  3945. for (auto Iter : funcMap) {
  3946. Function *F = Iter.first;
  3947. Function *flatF = Iter.second;
  3948. flatF->takeName(F);
  3949. F->eraseFromParent();
  3950. }
  3951. // Flatten internal global.
  3952. std::vector<GlobalVariable *> staticGVs;
  3953. for (GlobalVariable &GV : M.globals()) {
  3954. if (dxilutil::IsStaticGlobal(&GV) ||
  3955. dxilutil::IsSharedMemoryGlobal(&GV)) {
  3956. staticGVs.emplace_back(&GV);
  3957. } else {
  3958. // merge GEP use for global.
  3959. HLModule::MergeGepUse(&GV);
  3960. }
  3961. }
  3962. for (GlobalVariable *GV : staticGVs)
  3963. flattenGlobal(GV);
  3964. // Remove unused internal global.
  3965. staticGVs.clear();
  3966. for (GlobalVariable &GV : M.globals()) {
  3967. if (dxilutil::IsStaticGlobal(&GV) ||
  3968. dxilutil::IsSharedMemoryGlobal(&GV)) {
  3969. staticGVs.emplace_back(&GV);
  3970. }
  3971. }
  3972. for (GlobalVariable *GV : staticGVs) {
  3973. bool onlyStoreUse = true;
  3974. for (User *user : GV->users()) {
  3975. if (isa<StoreInst>(user))
  3976. continue;
  3977. if (isa<ConstantExpr>(user) && user->user_empty())
  3978. continue;
  3979. // Check matrix store.
  3980. if (dxilutil::IsHLSLMatrixType(
  3981. GV->getType()->getPointerElementType())) {
  3982. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  3983. if (GetHLOpcodeGroupByName(CI->getCalledFunction()) ==
  3984. HLOpcodeGroup::HLMatLoadStore) {
  3985. HLMatLoadStoreOpcode opcode =
  3986. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  3987. if (opcode == HLMatLoadStoreOpcode::ColMatStore ||
  3988. opcode == HLMatLoadStoreOpcode::RowMatStore)
  3989. continue;
  3990. }
  3991. }
  3992. }
  3993. onlyStoreUse = false;
  3994. break;
  3995. }
  3996. if (onlyStoreUse) {
  3997. for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) {
  3998. Value *User = *(UserIt++);
  3999. if (Instruction *I = dyn_cast<Instruction>(User)) {
  4000. I->eraseFromParent();
  4001. }
  4002. else {
  4003. ConstantExpr *CE = cast<ConstantExpr>(User);
  4004. CE->dropAllReferences();
  4005. }
  4006. }
  4007. GV->eraseFromParent();
  4008. }
  4009. }
  4010. return true;
  4011. }
  4012. private:
  4013. void DeleteDeadInstructions();
  4014. void preprocessArgUsedInCall(Function *F);
  4015. void moveFunctionBody(Function *F, Function *flatF);
  4016. void replaceCall(Function *F, Function *flatF);
  4017. void createFlattenedFunction(Function *F);
  4018. void
  4019. flattenArgument(Function *F, Value *Arg, bool bForParam,
  4020. DxilParameterAnnotation &paramAnnotation,
  4021. std::vector<Value *> &FlatParamList,
  4022. std::vector<DxilParameterAnnotation> &FlatRetAnnotationList,
  4023. BasicBlock *EntryBlock, DbgDeclareInst *DDI);
  4024. Value *castResourceArgIfRequired(Value *V, Type *Ty, bool bOut,
  4025. DxilParamInputQual inputQual,
  4026. IRBuilder<> &Builder);
  4027. Value *castArgumentIfRequired(Value *V, Type *Ty, bool bOut,
  4028. DxilParamInputQual inputQual,
  4029. DxilFieldAnnotation &annotation,
  4030. IRBuilder<> &Builder);
  4031. // Replace use of parameter which changed type when flatten.
  4032. // Also add information to Arg if required.
  4033. void replaceCastParameter(Value *NewParam, Value *OldParam, Function &F,
  4034. Argument *Arg, const DxilParamInputQual inputQual,
  4035. IRBuilder<> &Builder);
  4036. void allocateSemanticIndex(
  4037. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  4038. unsigned startArgIndex, llvm::StringMap<Type *> &semanticTypeMap);
  4039. bool hasDynamicVectorIndexing(Value *V);
  4040. void flattenGlobal(GlobalVariable *GV);
  4041. static std::vector<Value*> GetConstValueIdxList(IRBuilder<>& builder, std::vector<unsigned> idxlist);
  4042. /// DeadInsts - Keep track of instructions we have made dead, so that
  4043. /// we can remove them after we are done working.
  4044. SmallVector<Value *, 32> DeadInsts;
  4045. // Map from orginal function to the flatten version.
  4046. std::unordered_map<Function *, Function *> funcMap;
  4047. // Map from original arg/param to flatten cast version.
  4048. std::unordered_map<Value *, std::pair<Value*, DxilParamInputQual>> castParamMap;
  4049. // Map form first element of a vector the list of all elements of the vector.
  4050. std::unordered_map<Value *, SmallVector<Value*, 4> > vectorEltsMap;
  4051. // Set for row major matrix parameter.
  4052. std::unordered_set<Value *> castRowMajorParamMap;
  4053. bool m_HasDbgInfo;
  4054. };
  4055. // When replacing aggregates by its scalar elements,
  4056. // the first element will preserve the original semantic,
  4057. // and the subsequent ones will temporarily use this value.
  4058. // We then run a pass to fix the semantics and properly renumber them
  4059. // once the aggregate has been fully expanded.
  4060. //
  4061. // For example:
  4062. // struct Foo { float a; float b; };
  4063. // void main(Foo foo : TEXCOORD0, float bar : TEXCOORD0)
  4064. //
  4065. // Will be expanded to
  4066. // void main(float a : TEXCOORD0, float b : *, float bar : TEXCOORD0)
  4067. //
  4068. // And then fixed up to
  4069. // void main(float a : TEXCOORD0, float b : TEXCOORD1, float bar : TEXCOORD0)
  4070. //
  4071. // (which will later on fail validation due to duplicate semantics).
  4072. constexpr const char *ContinuedPseudoSemantic = "*";
  4073. }
  4074. char SROA_Parameter_HLSL::ID = 0;
  4075. INITIALIZE_PASS(SROA_Parameter_HLSL, "scalarrepl-param-hlsl",
  4076. "Scalar Replacement of Aggregates HLSL (parameters)", false,
  4077. false)
  4078. void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(Function *F) {
  4079. if (F->isDeclaration())
  4080. return;
  4081. // Gather list of bitcast involving src and dest structs with identical layout
  4082. std::vector<BitCastInst*> worklist;
  4083. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
  4084. if (BitCastInst *BCI = dyn_cast<BitCastInst>(&*I)) {
  4085. Type *DstTy = BCI->getDestTy();
  4086. Type *SrcTy = BCI->getSrcTy();
  4087. if (!SrcTy->isPointerTy() || !DstTy->isPointerTy())
  4088. continue;
  4089. DstTy = DstTy->getPointerElementType();
  4090. SrcTy = SrcTy->getPointerElementType();
  4091. if (!SrcTy->isStructTy() || !DstTy->isStructTy())
  4092. continue;
  4093. StructType *DstST = cast<StructType>(DstTy);
  4094. StructType *SrcST = cast<StructType>(SrcTy);
  4095. if (!SrcST->isLayoutIdentical(DstST))
  4096. continue;
  4097. worklist.push_back(BCI);
  4098. }
  4099. }
  4100. // Replace bitcast involving src and dest structs with identical layout
  4101. while (!worklist.empty()) {
  4102. BitCastInst *BCI = worklist.back();
  4103. worklist.pop_back();
  4104. StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
  4105. StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
  4106. Value* srcPtr = BCI->getOperand(0);
  4107. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
  4108. AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
  4109. IRBuilder<> InstBuilder(BCI);
  4110. std::vector<unsigned> idxlist = { 0 };
  4111. CopyElementsOfStructsWithIdenticalLayout(InstBuilder, destPtr, srcPtr, srcStTy, idxlist);
  4112. BCI->replaceAllUsesWith(destPtr);
  4113. BCI->eraseFromParent();
  4114. }
  4115. }
  4116. std::vector<Value *>
  4117. SROA_Parameter_HLSL::GetConstValueIdxList(IRBuilder<> &builder,
  4118. std::vector<unsigned> idxlist) {
  4119. std::vector<Value *> idxConstList;
  4120. for (unsigned idx : idxlist) {
  4121. idxConstList.push_back(ConstantInt::get(builder.getInt32Ty(), idx));
  4122. }
  4123. return idxConstList;
  4124. }
  4125. void SROA_Parameter_HLSL::CopyElementsOfStructsWithIdenticalLayout(
  4126. IRBuilder<> &builder, Value *destPtr, Value *srcPtr, Type *ty,
  4127. std::vector<unsigned>& idxlist) {
  4128. if (ty->isStructTy()) {
  4129. for (unsigned i = 0; i < ty->getStructNumElements(); i++) {
  4130. idxlist.push_back(i);
  4131. CopyElementsOfStructsWithIdenticalLayout(
  4132. builder, destPtr, srcPtr, ty->getStructElementType(i), idxlist);
  4133. idxlist.pop_back();
  4134. }
  4135. } else if (ty->isArrayTy()) {
  4136. for (unsigned i = 0; i < ty->getArrayNumElements(); i++) {
  4137. idxlist.push_back(i);
  4138. CopyElementsOfStructsWithIdenticalLayout(
  4139. builder, destPtr, srcPtr, ty->getArrayElementType(), idxlist);
  4140. idxlist.pop_back();
  4141. }
  4142. } else if (ty->isIntegerTy() || ty->isFloatTy() || ty->isDoubleTy() ||
  4143. ty->isHalfTy() || ty->isVectorTy()) {
  4144. Value *srcGEP =
  4145. builder.CreateInBoundsGEP(srcPtr, GetConstValueIdxList(builder, idxlist));
  4146. Value *destGEP =
  4147. builder.CreateInBoundsGEP(destPtr, GetConstValueIdxList(builder, idxlist));
  4148. LoadInst *LI = builder.CreateLoad(srcGEP);
  4149. builder.CreateStore(LI, destGEP);
  4150. } else {
  4151. DXASSERT(0, "encountered unsupported type when copying elements of identical structs.");
  4152. }
  4153. }
  4154. /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
  4155. /// recursively including all their operands that become trivially dead.
  4156. void SROA_Parameter_HLSL::DeleteDeadInstructions() {
  4157. while (!DeadInsts.empty()) {
  4158. Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
  4159. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
  4160. if (Instruction *U = dyn_cast<Instruction>(*OI)) {
  4161. // Zero out the operand and see if it becomes trivially dead.
  4162. // (But, don't add allocas to the dead instruction list -- they are
  4163. // already on the worklist and will be deleted separately.)
  4164. *OI = nullptr;
  4165. if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
  4166. DeadInsts.push_back(U);
  4167. }
  4168. I->eraseFromParent();
  4169. }
  4170. }
  4171. bool SROA_Parameter_HLSL::hasDynamicVectorIndexing(Value *V) {
  4172. for (User *U : V->users()) {
  4173. if (!U->getType()->isPointerTy())
  4174. continue;
  4175. if (dyn_cast<GEPOperator>(U)) {
  4176. gep_type_iterator GEPIt = gep_type_begin(U), E = gep_type_end(U);
  4177. for (; GEPIt != E; ++GEPIt) {
  4178. if (isa<VectorType>(*GEPIt)) {
  4179. Value *VecIdx = GEPIt.getOperand();
  4180. if (!isa<ConstantInt>(VecIdx))
  4181. return true;
  4182. }
  4183. }
  4184. }
  4185. }
  4186. return false;
  4187. }
  4188. void SROA_Parameter_HLSL::flattenGlobal(GlobalVariable *GV) {
  4189. Type *Ty = GV->getType()->getPointerElementType();
  4190. // Skip basic types.
  4191. if (!Ty->isAggregateType() && !Ty->isVectorTy())
  4192. return;
  4193. std::deque<Value *> WorkList;
  4194. WorkList.push_back(GV);
  4195. // merge GEP use for global.
  4196. HLModule::MergeGepUse(GV);
  4197. DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
  4198. // Only used to create ConstantExpr.
  4199. IRBuilder<> Builder(m_pHLModule->GetCtx());
  4200. std::vector<Instruction*> deadAllocas;
  4201. const DataLayout &DL = GV->getParent()->getDataLayout();
  4202. unsigned debugOffset = 0;
  4203. std::unordered_map<Value*, StringRef> EltNameMap;
  4204. // Process the worklist
  4205. while (!WorkList.empty()) {
  4206. GlobalVariable *EltGV = cast<GlobalVariable>(WorkList.front());
  4207. WorkList.pop_front();
  4208. const bool bAllowReplace = true;
  4209. if (SROA_Helper::LowerMemcpy(EltGV, /*annoation*/ nullptr, dxilTypeSys, DL,
  4210. bAllowReplace)) {
  4211. continue;
  4212. }
  4213. // Flat Global vector if no dynamic vector indexing.
  4214. bool bFlatVector = !hasDynamicVectorIndexing(EltGV);
  4215. // Disable scalarization of groupshared vector arrays
  4216. if (GV->getType()->getAddressSpace() == DXIL::kTGSMAddrSpace &&
  4217. Ty->isArrayTy())
  4218. bFlatVector = false;
  4219. std::vector<Value *> Elts;
  4220. bool SROAed = SROA_Helper::DoScalarReplacement(
  4221. EltGV, Elts, Builder, bFlatVector,
  4222. // TODO: set precise.
  4223. /*hasPrecise*/ false, dxilTypeSys, DL, DeadInsts);
  4224. if (SROAed) {
  4225. // Push Elts into workList.
  4226. // Use rbegin to make sure the order not change.
  4227. for (auto iter = Elts.rbegin(); iter != Elts.rend(); iter++) {
  4228. WorkList.push_front(*iter);
  4229. if (m_HasDbgInfo) {
  4230. StringRef EltName = (*iter)->getName().ltrim(GV->getName());
  4231. EltNameMap[*iter] = EltName;
  4232. }
  4233. }
  4234. EltGV->removeDeadConstantUsers();
  4235. // Now erase any instructions that were made dead while rewriting the
  4236. // alloca.
  4237. DeleteDeadInstructions();
  4238. ++NumReplaced;
  4239. } else {
  4240. // Add debug info for flattened globals.
  4241. if (m_HasDbgInfo && GV != EltGV) {
  4242. DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
  4243. Type *Ty = EltGV->getType()->getElementType();
  4244. unsigned size = DL.getTypeAllocSizeInBits(Ty);
  4245. unsigned align = DL.getPrefTypeAlignment(Ty);
  4246. HLModule::CreateElementGlobalVariableDebugInfo(
  4247. GV, Finder, EltGV, size, align, debugOffset,
  4248. EltNameMap[EltGV]);
  4249. debugOffset += size;
  4250. }
  4251. }
  4252. }
  4253. DeleteDeadInstructions();
  4254. if (GV->user_empty()) {
  4255. GV->removeDeadConstantUsers();
  4256. GV->eraseFromParent();
  4257. }
  4258. }
  4259. static DxilFieldAnnotation &GetEltAnnotation(Type *Ty, unsigned idx, DxilFieldAnnotation &annotation, DxilTypeSystem &dxilTypeSys) {
  4260. while (Ty->isArrayTy())
  4261. Ty = Ty->getArrayElementType();
  4262. if (StructType *ST = dyn_cast<StructType>(Ty)) {
  4263. if (dxilutil::IsHLSLMatrixType(Ty))
  4264. return annotation;
  4265. DxilStructAnnotation *SA = dxilTypeSys.GetStructAnnotation(ST);
  4266. if (SA) {
  4267. DxilFieldAnnotation &FA = SA->GetFieldAnnotation(idx);
  4268. return FA;
  4269. }
  4270. }
  4271. return annotation;
  4272. }
  4273. // Note: Semantic index allocation.
  4274. // Semantic index is allocated base on linear layout.
  4275. // For following code
  4276. /*
  4277. struct S {
  4278. float4 m;
  4279. float4 m2;
  4280. };
  4281. S s[2] : semantic;
  4282. struct S2 {
  4283. float4 m[2];
  4284. float4 m2[2];
  4285. };
  4286. S2 s2 : semantic;
  4287. */
  4288. // The semantic index is like this:
  4289. // s[0].m : semantic0
  4290. // s[0].m2 : semantic1
  4291. // s[1].m : semantic2
  4292. // s[1].m2 : semantic3
  4293. // s2.m[0] : semantic0
  4294. // s2.m[1] : semantic1
  4295. // s2.m2[0] : semantic2
  4296. // s2.m2[1] : semantic3
  4297. // But when flatten argument, the result is like this:
  4298. // float4 s_m[2], float4 s_m2[2].
  4299. // float4 s2_m[2], float4 s2_m2[2].
  4300. // To do the allocation, need to map from each element to its flattened argument.
  4301. // Say arg index of float4 s_m[2] is 0, float4 s_m2[2] is 1.
  4302. // Need to get 0 from s[0].m and s[1].m, get 1 from s[0].m2 and s[1].m2.
  4303. // Allocate the argments with same semantic string from type where the
  4304. // semantic starts( S2 for s2.m[2] and s2.m2[2]).
  4305. // Iterate each elements of the type, save the semantic index and update it.
  4306. // The map from element to the arg ( s[0].m2 -> s.m2[2]) is done by argIdx.
  4307. // ArgIdx only inc by 1 when finish a struct field.
  4308. static unsigned AllocateSemanticIndex(
  4309. Type *Ty, unsigned &semIndex, unsigned argIdx, unsigned endArgIdx,
  4310. std::vector<DxilParameterAnnotation> &FlatAnnotationList) {
  4311. if (Ty->isPointerTy()) {
  4312. return AllocateSemanticIndex(Ty->getPointerElementType(), semIndex, argIdx,
  4313. endArgIdx, FlatAnnotationList);
  4314. } else if (Ty->isArrayTy()) {
  4315. unsigned arraySize = Ty->getArrayNumElements();
  4316. unsigned updatedArgIdx = argIdx;
  4317. Type *EltTy = Ty->getArrayElementType();
  4318. for (unsigned i = 0; i < arraySize; i++) {
  4319. updatedArgIdx = AllocateSemanticIndex(EltTy, semIndex, argIdx, endArgIdx,
  4320. FlatAnnotationList);
  4321. }
  4322. return updatedArgIdx;
  4323. } else if (Ty->isStructTy() && !dxilutil::IsHLSLMatrixType(Ty)) {
  4324. unsigned fieldsCount = Ty->getStructNumElements();
  4325. for (unsigned i = 0; i < fieldsCount; i++) {
  4326. Type *EltTy = Ty->getStructElementType(i);
  4327. argIdx = AllocateSemanticIndex(EltTy, semIndex, argIdx, endArgIdx,
  4328. FlatAnnotationList);
  4329. if (!(EltTy->isStructTy() && !dxilutil::IsHLSLMatrixType(EltTy))) {
  4330. // Update argIdx only when it is a leaf node.
  4331. argIdx++;
  4332. }
  4333. }
  4334. return argIdx;
  4335. } else {
  4336. DXASSERT(argIdx < endArgIdx, "arg index out of bound");
  4337. DxilParameterAnnotation &paramAnnotation = FlatAnnotationList[argIdx];
  4338. // Get element size.
  4339. unsigned rows = 1;
  4340. if (paramAnnotation.HasMatrixAnnotation()) {
  4341. const DxilMatrixAnnotation &matrix =
  4342. paramAnnotation.GetMatrixAnnotation();
  4343. if (matrix.Orientation == MatrixOrientation::RowMajor) {
  4344. rows = matrix.Rows;
  4345. } else {
  4346. DXASSERT_NOMSG(matrix.Orientation == MatrixOrientation::ColumnMajor);
  4347. rows = matrix.Cols;
  4348. }
  4349. }
  4350. // Save semIndex.
  4351. for (unsigned i = 0; i < rows; i++)
  4352. paramAnnotation.AppendSemanticIndex(semIndex + i);
  4353. // Update semIndex.
  4354. semIndex += rows;
  4355. return argIdx;
  4356. }
  4357. }
  4358. void SROA_Parameter_HLSL::allocateSemanticIndex(
  4359. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  4360. unsigned startArgIndex, llvm::StringMap<Type *> &semanticTypeMap) {
  4361. unsigned endArgIndex = FlatAnnotationList.size();
  4362. // Allocate semantic index.
  4363. for (unsigned i = startArgIndex; i < endArgIndex; ++i) {
  4364. // Group by semantic names.
  4365. DxilParameterAnnotation &flatParamAnnotation = FlatAnnotationList[i];
  4366. const std::string &semantic = flatParamAnnotation.GetSemanticString();
  4367. // If semantic is undefined, an error will be emitted elsewhere. For now,
  4368. // we should avoid asserting.
  4369. if (semantic.empty())
  4370. continue;
  4371. StringRef baseSemName; // The 'FOO' in 'FOO1'.
  4372. uint32_t semIndex; // The '1' in 'FOO1'
  4373. // Split semName and index.
  4374. Semantic::DecomposeNameAndIndex(semantic, &baseSemName, &semIndex);
  4375. unsigned semGroupEnd = i + 1;
  4376. while (semGroupEnd < endArgIndex &&
  4377. FlatAnnotationList[semGroupEnd].GetSemanticString() == ContinuedPseudoSemantic) {
  4378. FlatAnnotationList[semGroupEnd].SetSemanticString(baseSemName);
  4379. ++semGroupEnd;
  4380. }
  4381. DXASSERT(semanticTypeMap.count(semantic) > 0, "Must has semantic type");
  4382. Type *semanticTy = semanticTypeMap[semantic];
  4383. AllocateSemanticIndex(semanticTy, semIndex, /*argIdx*/ i,
  4384. /*endArgIdx*/ semGroupEnd, FlatAnnotationList);
  4385. // Update i.
  4386. i = semGroupEnd - 1;
  4387. }
  4388. }
  4389. //
  4390. // Cast parameters.
  4391. //
  4392. static void CopyHandleToResourcePtr(Value *Handle, Value *ResPtr, HLModule &HLM,
  4393. IRBuilder<> &Builder) {
  4394. // Cast it to resource.
  4395. Type *ResTy = ResPtr->getType()->getPointerElementType();
  4396. Value *Res = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4397. (unsigned)HLCastOpcode::HandleToResCast,
  4398. ResTy, {Handle}, *HLM.GetModule());
  4399. // Store casted resource to OldArg.
  4400. Builder.CreateStore(Res, ResPtr);
  4401. }
  4402. static void CopyHandlePtrToResourcePtr(Value *HandlePtr, Value *ResPtr,
  4403. HLModule &HLM, IRBuilder<> &Builder) {
  4404. // Load the handle.
  4405. Value *Handle = Builder.CreateLoad(HandlePtr);
  4406. CopyHandleToResourcePtr(Handle, ResPtr, HLM, Builder);
  4407. }
  4408. static Value *CastResourcePtrToHandle(Value *Res, Type *HandleTy, HLModule &HLM,
  4409. IRBuilder<> &Builder) {
  4410. // Load OldArg.
  4411. Value *LdRes = Builder.CreateLoad(Res);
  4412. Value *Handle = HLM.EmitHLOperationCall(
  4413. Builder, HLOpcodeGroup::HLCreateHandle,
  4414. /*opcode*/ 0, HandleTy, {LdRes}, *HLM.GetModule());
  4415. return Handle;
  4416. }
  4417. static void CopyResourcePtrToHandlePtr(Value *Res, Value *HandlePtr,
  4418. HLModule &HLM, IRBuilder<> &Builder) {
  4419. Type *HandleTy = HandlePtr->getType()->getPointerElementType();
  4420. Value *Handle = CastResourcePtrToHandle(Res, HandleTy, HLM, Builder);
  4421. Builder.CreateStore(Handle, HandlePtr);
  4422. }
  4423. static void CopyVectorPtrToEltsPtr(Value *VecPtr, ArrayRef<Value *> elts,
  4424. unsigned vecSize, IRBuilder<> &Builder) {
  4425. Value *Vec = Builder.CreateLoad(VecPtr);
  4426. for (unsigned i = 0; i < vecSize; i++) {
  4427. Value *Elt = Builder.CreateExtractElement(Vec, i);
  4428. Builder.CreateStore(Elt, elts[i]);
  4429. }
  4430. }
  4431. static void CopyEltsPtrToVectorPtr(ArrayRef<Value *> elts, Value *VecPtr,
  4432. Type *VecTy, unsigned vecSize,
  4433. IRBuilder<> &Builder) {
  4434. Value *Vec = UndefValue::get(VecTy);
  4435. for (unsigned i = 0; i < vecSize; i++) {
  4436. Value *Elt = Builder.CreateLoad(elts[i]);
  4437. Vec = Builder.CreateInsertElement(Vec, Elt, i);
  4438. }
  4439. Builder.CreateStore(Vec, VecPtr);
  4440. }
  4441. static void CopyMatToArrayPtr(Value *Mat, Value *ArrayPtr,
  4442. unsigned arrayBaseIdx, HLModule &HLM,
  4443. IRBuilder<> &Builder, bool bRowMajor) {
  4444. // Mat val is row major.
  4445. HLMatrixType MatTy = HLMatrixType::cast(Mat->getType());
  4446. Type *VecTy = MatTy.getLoweredVectorTypeForReg();
  4447. Value *Vec =
  4448. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4449. (unsigned)HLCastOpcode::RowMatrixToVecCast, VecTy,
  4450. {Mat}, *HLM.GetModule());
  4451. Value *zero = Builder.getInt32(0);
  4452. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4453. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4454. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  4455. Value *Elt = Builder.CreateExtractElement(Vec, matIdx);
  4456. Value *Ptr = Builder.CreateInBoundsGEP(
  4457. ArrayPtr, {zero, Builder.getInt32(arrayBaseIdx + matIdx)});
  4458. Builder.CreateStore(Elt, Ptr);
  4459. }
  4460. }
  4461. }
  4462. static void CopyMatPtrToArrayPtr(Value *MatPtr, Value *ArrayPtr,
  4463. unsigned arrayBaseIdx, HLModule &HLM,
  4464. IRBuilder<> &Builder, bool bRowMajor) {
  4465. Type *Ty = MatPtr->getType()->getPointerElementType();
  4466. Value *Mat = nullptr;
  4467. if (bRowMajor) {
  4468. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4469. (unsigned)HLMatLoadStoreOpcode::RowMatLoad,
  4470. Ty, {MatPtr}, *HLM.GetModule());
  4471. } else {
  4472. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4473. (unsigned)HLMatLoadStoreOpcode::ColMatLoad,
  4474. Ty, {MatPtr}, *HLM.GetModule());
  4475. // Matrix value should be row major.
  4476. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4477. (unsigned)HLCastOpcode::ColMatrixToRowMatrix,
  4478. Ty, {Mat}, *HLM.GetModule());
  4479. }
  4480. CopyMatToArrayPtr(Mat, ArrayPtr, arrayBaseIdx, HLM, Builder, bRowMajor);
  4481. }
  4482. static Value *LoadArrayPtrToMat(Value *ArrayPtr, unsigned arrayBaseIdx,
  4483. Type *Ty, HLModule &HLM, IRBuilder<> &Builder,
  4484. bool bRowMajor) {
  4485. HLMatrixType MatTy = HLMatrixType::cast(Ty);
  4486. // HLInit operands are in row major.
  4487. SmallVector<Value *, 16> Elts;
  4488. Value *zero = Builder.getInt32(0);
  4489. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4490. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4491. unsigned matIdx = bRowMajor
  4492. ? MatTy.getRowMajorIndex(r, c)
  4493. : MatTy.getColumnMajorIndex(r, c);
  4494. Value *Ptr = Builder.CreateInBoundsGEP(
  4495. ArrayPtr, {zero, Builder.getInt32(arrayBaseIdx + matIdx)});
  4496. Value *Elt = Builder.CreateLoad(Ptr);
  4497. Elts.emplace_back(Elt);
  4498. }
  4499. }
  4500. return HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLInit,
  4501. /*opcode*/ 0, Ty, {Elts}, *HLM.GetModule());
  4502. }
  4503. static void CopyArrayPtrToMatPtr(Value *ArrayPtr, unsigned arrayBaseIdx,
  4504. Value *MatPtr, HLModule &HLM,
  4505. IRBuilder<> &Builder, bool bRowMajor) {
  4506. Type *Ty = MatPtr->getType()->getPointerElementType();
  4507. Value *Mat =
  4508. LoadArrayPtrToMat(ArrayPtr, arrayBaseIdx, Ty, HLM, Builder, bRowMajor);
  4509. if (bRowMajor) {
  4510. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4511. (unsigned)HLMatLoadStoreOpcode::RowMatStore, Ty,
  4512. {MatPtr, Mat}, *HLM.GetModule());
  4513. } else {
  4514. // Mat is row major.
  4515. // Cast it to col major before store.
  4516. Mat = HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast,
  4517. (unsigned)HLCastOpcode::RowMatrixToColMatrix,
  4518. Ty, {Mat}, *HLM.GetModule());
  4519. HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLMatLoadStore,
  4520. (unsigned)HLMatLoadStoreOpcode::ColMatStore, Ty,
  4521. {MatPtr, Mat}, *HLM.GetModule());
  4522. }
  4523. }
  4524. using CopyFunctionTy = void(Value *FromPtr, Value *ToPtr, HLModule &HLM,
  4525. Type *HandleTy, IRBuilder<> &Builder,
  4526. bool bRowMajor);
  4527. static void
  4528. CastCopyArrayMultiDimTo1Dim(Value *FromArray, Value *ToArray, Type *CurFromTy,
  4529. std::vector<Value *> &idxList, unsigned calcIdx,
  4530. Type *HandleTy, HLModule &HLM, IRBuilder<> &Builder,
  4531. CopyFunctionTy CastCopyFn, bool bRowMajor) {
  4532. if (CurFromTy->isVectorTy()) {
  4533. // Copy vector to array.
  4534. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4535. Value *V = Builder.CreateLoad(FromPtr);
  4536. unsigned vecSize = CurFromTy->getVectorNumElements();
  4537. Value *zeroIdx = Builder.getInt32(0);
  4538. for (unsigned i = 0; i < vecSize; i++) {
  4539. Value *ToPtr = Builder.CreateInBoundsGEP(
  4540. ToArray, {zeroIdx, Builder.getInt32(calcIdx++)});
  4541. Value *Elt = Builder.CreateExtractElement(V, i);
  4542. Builder.CreateStore(Elt, ToPtr);
  4543. }
  4544. } else if (HLMatrixType MatTy = HLMatrixType::dyn_cast(CurFromTy)) {
  4545. // Copy matrix to array.
  4546. // Calculate the offset.
  4547. unsigned offset = calcIdx * MatTy.getNumElements();
  4548. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4549. CopyMatPtrToArrayPtr(FromPtr, ToArray, offset, HLM, Builder, bRowMajor);
  4550. } else if (!CurFromTy->isArrayTy()) {
  4551. Value *FromPtr = Builder.CreateInBoundsGEP(FromArray, idxList);
  4552. Value *ToPtr = Builder.CreateInBoundsGEP(
  4553. ToArray, {Builder.getInt32(0), Builder.getInt32(calcIdx)});
  4554. CastCopyFn(FromPtr, ToPtr, HLM, HandleTy, Builder, bRowMajor);
  4555. } else {
  4556. unsigned size = CurFromTy->getArrayNumElements();
  4557. Type *FromEltTy = CurFromTy->getArrayElementType();
  4558. for (unsigned i = 0; i < size; i++) {
  4559. idxList.push_back(Builder.getInt32(i));
  4560. unsigned idx = calcIdx * size + i;
  4561. CastCopyArrayMultiDimTo1Dim(FromArray, ToArray, FromEltTy, idxList, idx,
  4562. HandleTy, HLM, Builder, CastCopyFn,
  4563. bRowMajor);
  4564. idxList.pop_back();
  4565. }
  4566. }
  4567. }
  4568. static void
  4569. CastCopyArray1DimToMultiDim(Value *FromArray, Value *ToArray, Type *CurToTy,
  4570. std::vector<Value *> &idxList, unsigned calcIdx,
  4571. Type *HandleTy, HLModule &HLM, IRBuilder<> &Builder,
  4572. CopyFunctionTy CastCopyFn, bool bRowMajor) {
  4573. if (CurToTy->isVectorTy()) {
  4574. // Copy array to vector.
  4575. Value *V = UndefValue::get(CurToTy);
  4576. unsigned vecSize = CurToTy->getVectorNumElements();
  4577. // Calculate the offset.
  4578. unsigned offset = calcIdx * vecSize;
  4579. Value *zeroIdx = Builder.getInt32(0);
  4580. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4581. for (unsigned i = 0; i < vecSize; i++) {
  4582. Value *FromPtr = Builder.CreateInBoundsGEP(
  4583. FromArray, {zeroIdx, Builder.getInt32(offset++)});
  4584. Value *Elt = Builder.CreateLoad(FromPtr);
  4585. V = Builder.CreateInsertElement(V, Elt, i);
  4586. }
  4587. Builder.CreateStore(V, ToPtr);
  4588. } else if (HLMatrixType MatTy = HLMatrixType::cast(CurToTy)) {
  4589. // Copy array to matrix.
  4590. // Calculate the offset.
  4591. unsigned offset = calcIdx * MatTy.getNumElements();
  4592. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4593. CopyArrayPtrToMatPtr(FromArray, offset, ToPtr, HLM, Builder, bRowMajor);
  4594. } else if (!CurToTy->isArrayTy()) {
  4595. Value *FromPtr = Builder.CreateInBoundsGEP(
  4596. FromArray, {Builder.getInt32(0), Builder.getInt32(calcIdx)});
  4597. Value *ToPtr = Builder.CreateInBoundsGEP(ToArray, idxList);
  4598. CastCopyFn(FromPtr, ToPtr, HLM, HandleTy, Builder, bRowMajor);
  4599. } else {
  4600. unsigned size = CurToTy->getArrayNumElements();
  4601. Type *ToEltTy = CurToTy->getArrayElementType();
  4602. for (unsigned i = 0; i < size; i++) {
  4603. idxList.push_back(Builder.getInt32(i));
  4604. unsigned idx = calcIdx * size + i;
  4605. CastCopyArray1DimToMultiDim(FromArray, ToArray, ToEltTy, idxList, idx,
  4606. HandleTy, HLM, Builder, CastCopyFn,
  4607. bRowMajor);
  4608. idxList.pop_back();
  4609. }
  4610. }
  4611. }
  4612. static void CastCopyOldPtrToNewPtr(Value *OldPtr, Value *NewPtr, HLModule &HLM,
  4613. Type *HandleTy, IRBuilder<> &Builder,
  4614. bool bRowMajor) {
  4615. Type *NewTy = NewPtr->getType()->getPointerElementType();
  4616. Type *OldTy = OldPtr->getType()->getPointerElementType();
  4617. if (NewTy == HandleTy) {
  4618. CopyResourcePtrToHandlePtr(OldPtr, NewPtr, HLM, Builder);
  4619. } else if (OldTy->isVectorTy()) {
  4620. // Copy vector to array.
  4621. Value *V = Builder.CreateLoad(OldPtr);
  4622. unsigned vecSize = OldTy->getVectorNumElements();
  4623. Value *zeroIdx = Builder.getInt32(0);
  4624. for (unsigned i = 0; i < vecSize; i++) {
  4625. Value *EltPtr = Builder.CreateGEP(NewPtr, {zeroIdx, Builder.getInt32(i)});
  4626. Value *Elt = Builder.CreateExtractElement(V, i);
  4627. Builder.CreateStore(Elt, EltPtr);
  4628. }
  4629. } else if (dxilutil::IsHLSLMatrixType(OldTy)) {
  4630. CopyMatPtrToArrayPtr(OldPtr, NewPtr, /*arrayBaseIdx*/ 0, HLM, Builder,
  4631. bRowMajor);
  4632. } else if (OldTy->isArrayTy()) {
  4633. std::vector<Value *> idxList;
  4634. idxList.emplace_back(Builder.getInt32(0));
  4635. CastCopyArrayMultiDimTo1Dim(OldPtr, NewPtr, OldTy, idxList, /*calcIdx*/ 0,
  4636. HandleTy, HLM, Builder, CastCopyOldPtrToNewPtr,
  4637. bRowMajor);
  4638. }
  4639. }
  4640. static void CastCopyNewPtrToOldPtr(Value *NewPtr, Value *OldPtr, HLModule &HLM,
  4641. Type *HandleTy, IRBuilder<> &Builder,
  4642. bool bRowMajor) {
  4643. Type *NewTy = NewPtr->getType()->getPointerElementType();
  4644. Type *OldTy = OldPtr->getType()->getPointerElementType();
  4645. if (NewTy == HandleTy) {
  4646. CopyHandlePtrToResourcePtr(NewPtr, OldPtr, HLM, Builder);
  4647. } else if (OldTy->isVectorTy()) {
  4648. // Copy array to vector.
  4649. Value *V = UndefValue::get(OldTy);
  4650. unsigned vecSize = OldTy->getVectorNumElements();
  4651. Value *zeroIdx = Builder.getInt32(0);
  4652. for (unsigned i = 0; i < vecSize; i++) {
  4653. Value *EltPtr = Builder.CreateGEP(NewPtr, {zeroIdx, Builder.getInt32(i)});
  4654. Value *Elt = Builder.CreateLoad(EltPtr);
  4655. V = Builder.CreateInsertElement(V, Elt, i);
  4656. }
  4657. Builder.CreateStore(V, OldPtr);
  4658. } else if (dxilutil::IsHLSLMatrixType(OldTy)) {
  4659. CopyArrayPtrToMatPtr(NewPtr, /*arrayBaseIdx*/ 0, OldPtr, HLM, Builder,
  4660. bRowMajor);
  4661. } else if (OldTy->isArrayTy()) {
  4662. std::vector<Value *> idxList;
  4663. idxList.emplace_back(Builder.getInt32(0));
  4664. CastCopyArray1DimToMultiDim(NewPtr, OldPtr, OldTy, idxList, /*calcIdx*/ 0,
  4665. HandleTy, HLM, Builder, CastCopyNewPtrToOldPtr,
  4666. bRowMajor);
  4667. }
  4668. }
  4669. void SROA_Parameter_HLSL::replaceCastParameter(
  4670. Value *NewParam, Value *OldParam, Function &F, Argument *Arg,
  4671. const DxilParamInputQual inputQual, IRBuilder<> &Builder) {
  4672. Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
  4673. Type *HandlePtrTy = PointerType::get(HandleTy, 0);
  4674. Module &M = *m_pHLModule->GetModule();
  4675. Type *NewTy = NewParam->getType();
  4676. Type *OldTy = OldParam->getType();
  4677. bool bIn = inputQual == DxilParamInputQual::Inout ||
  4678. inputQual == DxilParamInputQual::In;
  4679. bool bOut = inputQual == DxilParamInputQual::Inout ||
  4680. inputQual == DxilParamInputQual::Out;
  4681. // Make sure InsertPoint after OldParam inst.
  4682. if (Instruction *I = dyn_cast<Instruction>(OldParam)) {
  4683. Builder.SetInsertPoint(I->getNextNode());
  4684. }
  4685. if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(OldParam)) {
  4686. // Add debug info to new param.
  4687. DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
  4688. DIExpression *DDIExp = DDI->getExpression();
  4689. DIB.insertDeclare(NewParam, DDI->getVariable(), DDIExp, DDI->getDebugLoc(),
  4690. Builder.GetInsertPoint());
  4691. }
  4692. if (isa<Argument>(OldParam) && OldTy->isPointerTy()) {
  4693. // OldParam will be removed with Old function.
  4694. // Create alloca to replace it.
  4695. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(&F));
  4696. Value *AllocParam = AllocaBuilder.CreateAlloca(OldTy->getPointerElementType());
  4697. OldParam->replaceAllUsesWith(AllocParam);
  4698. OldParam = AllocParam;
  4699. }
  4700. if (NewTy == HandleTy) {
  4701. CopyHandleToResourcePtr(NewParam, OldParam, *m_pHLModule, Builder);
  4702. // Save resource attribute.
  4703. Type *ResTy = OldTy->getPointerElementType();
  4704. MDNode *MD = HLModule::GetDxilResourceAttrib(ResTy, M);
  4705. m_pHLModule->MarkDxilResourceAttrib(Arg, MD);
  4706. } else if (vectorEltsMap.count(NewParam)) {
  4707. // Vector is flattened to scalars.
  4708. Type *VecTy = OldTy;
  4709. if (VecTy->isPointerTy())
  4710. VecTy = VecTy->getPointerElementType();
  4711. // Flattened vector.
  4712. SmallVector<Value *, 4> &elts = vectorEltsMap[NewParam];
  4713. unsigned vecSize = elts.size();
  4714. if (NewTy->isPointerTy()) {
  4715. if (bIn) {
  4716. // Copy NewParam to OldParam at entry.
  4717. CopyEltsPtrToVectorPtr(elts, OldParam, VecTy, vecSize, Builder);
  4718. }
  4719. // bOut must be true here.
  4720. // Store the OldParam to NewParam before every return.
  4721. for (auto &BB : F.getBasicBlockList()) {
  4722. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  4723. IRBuilder<> RetBuilder(RI);
  4724. CopyVectorPtrToEltsPtr(OldParam, elts, vecSize, RetBuilder);
  4725. }
  4726. }
  4727. } else {
  4728. // Must be in parameter.
  4729. // Copy NewParam to OldParam at entry.
  4730. Value *Vec = UndefValue::get(VecTy);
  4731. for (unsigned i = 0; i < vecSize; i++) {
  4732. Vec = Builder.CreateInsertElement(Vec, elts[i], i);
  4733. }
  4734. if (OldTy->isPointerTy()) {
  4735. Builder.CreateStore(Vec, OldParam);
  4736. } else {
  4737. OldParam->replaceAllUsesWith(Vec);
  4738. }
  4739. }
  4740. // Don't need elts anymore.
  4741. vectorEltsMap.erase(NewParam);
  4742. } else if (!NewTy->isPointerTy()) {
  4743. // Ptr param is cast to non-ptr param.
  4744. // Must be in param.
  4745. // Store NewParam to OldParam at entry.
  4746. Builder.CreateStore(NewParam, OldParam);
  4747. } else if (dxilutil::IsHLSLMatrixType(OldTy)) {
  4748. bool bRowMajor = castRowMajorParamMap.count(NewParam);
  4749. Value *Mat = LoadArrayPtrToMat(NewParam, /*arrayBaseIdx*/ 0, OldTy,
  4750. *m_pHLModule, Builder, bRowMajor);
  4751. OldParam->replaceAllUsesWith(Mat);
  4752. } else {
  4753. bool bRowMajor = castRowMajorParamMap.count(NewParam);
  4754. // NewTy is pointer type.
  4755. if (bIn) {
  4756. // Copy NewParam to OldParam at entry.
  4757. CastCopyNewPtrToOldPtr(NewParam, OldParam, *m_pHLModule, HandleTy,
  4758. Builder, bRowMajor);
  4759. }
  4760. if (bOut) {
  4761. // Store the OldParam to NewParam before every return.
  4762. for (auto &BB : F.getBasicBlockList()) {
  4763. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  4764. IRBuilder<> RetBuilder(RI);
  4765. CastCopyOldPtrToNewPtr(OldParam, NewParam, *m_pHLModule, HandleTy,
  4766. RetBuilder, bRowMajor);
  4767. }
  4768. }
  4769. }
  4770. Type *NewEltTy = dxilutil::GetArrayEltTy(NewTy);
  4771. Type *OldEltTy = dxilutil::GetArrayEltTy(OldTy);
  4772. if (NewEltTy == HandlePtrTy) {
  4773. // Save resource attribute.
  4774. Type *ResTy = OldEltTy;
  4775. MDNode *MD = HLModule::GetDxilResourceAttrib(ResTy, M);
  4776. m_pHLModule->MarkDxilResourceAttrib(Arg, MD);
  4777. }
  4778. }
  4779. }
  4780. Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
  4781. Value *V, Type *Ty, bool bOut,
  4782. DxilParamInputQual inputQual,
  4783. IRBuilder<> &Builder) {
  4784. Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
  4785. Module &M = *m_pHLModule->GetModule();
  4786. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  4787. // Lower resource type to handle ty.
  4788. if (dxilutil::IsHLSLObjectType(Ty) &&
  4789. !HLModule::IsStreamOutputPtrType(V->getType())) {
  4790. Value *Res = V;
  4791. if (!bOut) {
  4792. Value *LdRes = Builder.CreateLoad(Res);
  4793. V = m_pHLModule->EmitHLOperationCall(Builder,
  4794. HLOpcodeGroup::HLCreateHandle,
  4795. /*opcode*/ 0, HandleTy, { LdRes }, M);
  4796. }
  4797. else {
  4798. V = AllocaBuilder.CreateAlloca(HandleTy);
  4799. }
  4800. castParamMap[V] = std::make_pair(Res, inputQual);
  4801. }
  4802. else if (Ty->isArrayTy()) {
  4803. unsigned arraySize = 1;
  4804. Type *AT = Ty;
  4805. while (AT->isArrayTy()) {
  4806. arraySize *= AT->getArrayNumElements();
  4807. AT = AT->getArrayElementType();
  4808. }
  4809. if (dxilutil::IsHLSLObjectType(AT)) {
  4810. Value *Res = V;
  4811. Type *Ty = ArrayType::get(HandleTy, arraySize);
  4812. V = AllocaBuilder.CreateAlloca(Ty);
  4813. castParamMap[V] = std::make_pair(Res, inputQual);
  4814. }
  4815. }
  4816. return V;
  4817. }
  4818. Value *SROA_Parameter_HLSL::castArgumentIfRequired(
  4819. Value *V, Type *Ty, bool bOut,
  4820. DxilParamInputQual inputQual, DxilFieldAnnotation &annotation,
  4821. IRBuilder<> &Builder) {
  4822. Module &M = *m_pHLModule->GetModule();
  4823. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  4824. // Remove pointer for vector/scalar which is not out.
  4825. if (V->getType()->isPointerTy() && !Ty->isAggregateType() && !bOut) {
  4826. Value *Ptr = AllocaBuilder.CreateAlloca(Ty);
  4827. V->replaceAllUsesWith(Ptr);
  4828. // Create load here to make correct type.
  4829. // The Ptr will be store with correct value in replaceCastParameter.
  4830. if (Ptr->hasOneUse()) {
  4831. // Load after existing user for call arg replace.
  4832. // If not, call arg will load undef.
  4833. // This will not hurt parameter, new load is only after first load.
  4834. // It still before all the load users.
  4835. Instruction *User = cast<Instruction>(*(Ptr->user_begin()));
  4836. IRBuilder<> CallBuilder(User->getNextNode());
  4837. V = CallBuilder.CreateLoad(Ptr);
  4838. } else {
  4839. V = Builder.CreateLoad(Ptr);
  4840. }
  4841. castParamMap[V] = std::make_pair(Ptr, inputQual);
  4842. }
  4843. V = castResourceArgIfRequired(V, Ty, bOut, inputQual, Builder);
  4844. // Entry function matrix value parameter has major.
  4845. // Make sure its user use row major matrix value.
  4846. bool updateToColMajor = annotation.HasMatrixAnnotation() &&
  4847. annotation.GetMatrixAnnotation().Orientation ==
  4848. MatrixOrientation::ColumnMajor;
  4849. if (updateToColMajor) {
  4850. if (V->getType()->isPointerTy()) {
  4851. for (User *user : V->users()) {
  4852. CallInst *CI = dyn_cast<CallInst>(user);
  4853. if (!CI)
  4854. continue;
  4855. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4856. if (group != HLOpcodeGroup::HLMatLoadStore)
  4857. continue;
  4858. HLMatLoadStoreOpcode opcode =
  4859. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  4860. Type *opcodeTy = Builder.getInt32Ty();
  4861. switch (opcode) {
  4862. case HLMatLoadStoreOpcode::RowMatLoad: {
  4863. // Update matrix function opcode to col major version.
  4864. Value *rowOpArg = ConstantInt::get(
  4865. opcodeTy,
  4866. static_cast<unsigned>(HLMatLoadStoreOpcode::ColMatLoad));
  4867. CI->setOperand(HLOperandIndex::kOpcodeIdx, rowOpArg);
  4868. // Cast it to row major.
  4869. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4870. Builder, HLOpcodeGroup::HLCast,
  4871. (unsigned)HLCastOpcode::ColMatrixToRowMatrix, Ty, {CI}, M);
  4872. CI->replaceAllUsesWith(RowMat);
  4873. // Set arg to CI again.
  4874. RowMat->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, CI);
  4875. } break;
  4876. case HLMatLoadStoreOpcode::RowMatStore:
  4877. // Update matrix function opcode to col major version.
  4878. Value *rowOpArg = ConstantInt::get(
  4879. opcodeTy,
  4880. static_cast<unsigned>(HLMatLoadStoreOpcode::ColMatStore));
  4881. CI->setOperand(HLOperandIndex::kOpcodeIdx, rowOpArg);
  4882. Value *Mat = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  4883. // Cast it to col major.
  4884. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4885. Builder, HLOpcodeGroup::HLCast,
  4886. (unsigned)HLCastOpcode::RowMatrixToColMatrix, Ty, {Mat}, M);
  4887. CI->setArgOperand(HLOperandIndex::kMatStoreValOpIdx, RowMat);
  4888. break;
  4889. }
  4890. }
  4891. } else {
  4892. CallInst *RowMat = HLModule::EmitHLOperationCall(
  4893. Builder, HLOpcodeGroup::HLCast,
  4894. (unsigned)HLCastOpcode::ColMatrixToRowMatrix, Ty, {V}, M);
  4895. V->replaceAllUsesWith(RowMat);
  4896. // Set arg to V again.
  4897. RowMat->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, V);
  4898. }
  4899. }
  4900. return V;
  4901. }
  4902. struct AnnotatedValue {
  4903. llvm::Value *Value;
  4904. DxilFieldAnnotation Annotation;
  4905. };
  4906. void SROA_Parameter_HLSL::flattenArgument(
  4907. Function *F, Value *Arg, bool bForParam,
  4908. DxilParameterAnnotation &paramAnnotation,
  4909. std::vector<Value *> &FlatParamList,
  4910. std::vector<DxilParameterAnnotation> &FlatAnnotationList,
  4911. BasicBlock *EntryBlock, DbgDeclareInst *DDI) {
  4912. std::deque<AnnotatedValue> WorkList;
  4913. WorkList.push_back({ Arg, paramAnnotation });
  4914. unsigned startArgIndex = FlatAnnotationList.size();
  4915. DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
  4916. const std::string &semantic = paramAnnotation.GetSemanticString();
  4917. DxilParamInputQual inputQual = paramAnnotation.GetParamInputQual();
  4918. bool bOut = inputQual == DxilParamInputQual::Out ||
  4919. inputQual == DxilParamInputQual::Inout ||
  4920. inputQual == DxilParamInputQual::OutStream0 ||
  4921. inputQual == DxilParamInputQual::OutStream1 ||
  4922. inputQual == DxilParamInputQual::OutStream2 ||
  4923. inputQual == DxilParamInputQual::OutStream3;
  4924. // Map from semantic string to type.
  4925. llvm::StringMap<Type *> semanticTypeMap;
  4926. // Original semantic type.
  4927. if (!semantic.empty()) {
  4928. // Unwrap top-level array if primitive
  4929. if (inputQual == DxilParamInputQual::InputPatch ||
  4930. inputQual == DxilParamInputQual::OutputPatch ||
  4931. inputQual == DxilParamInputQual::InputPrimitive) {
  4932. Type *Ty = Arg->getType();
  4933. if (Ty->isPointerTy())
  4934. Ty = Ty->getPointerElementType();
  4935. if (Ty->isArrayTy())
  4936. semanticTypeMap[semantic] = Ty->getArrayElementType();
  4937. } else {
  4938. semanticTypeMap[semantic] = Arg->getType();
  4939. }
  4940. }
  4941. std::vector<Instruction*> deadAllocas;
  4942. DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
  4943. unsigned debugOffset = 0;
  4944. const DataLayout &DL = F->getParent()->getDataLayout();
  4945. // Process the worklist
  4946. while (!WorkList.empty()) {
  4947. AnnotatedValue AV = WorkList.front();
  4948. WorkList.pop_front();
  4949. // Do not skip unused parameter.
  4950. Value *V = AV.Value;
  4951. DxilFieldAnnotation &annotation = AV.Annotation;
  4952. const bool bAllowReplace = !bOut;
  4953. SROA_Helper::LowerMemcpy(V, &annotation, dxilTypeSys, DL, bAllowReplace);
  4954. // Now is safe to create the IRBuilders.
  4955. // If we create it before LowerMemcpy, the insertion pointer instruction may get deleted
  4956. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
  4957. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
  4958. std::vector<Value *> Elts;
  4959. // Not flat vector for entry function currently.
  4960. bool SROAed = SROA_Helper::DoScalarReplacement(
  4961. V, Elts, Builder, /*bFlatVector*/ false, annotation.IsPrecise(),
  4962. dxilTypeSys, DL, DeadInsts);
  4963. if (SROAed) {
  4964. Type *Ty = V->getType()->getPointerElementType();
  4965. // Skip empty struct parameters.
  4966. if (SROA_Helper::IsEmptyStructType(Ty, dxilTypeSys)) {
  4967. SROA_Helper::MarkEmptyStructUsers(V, DeadInsts);
  4968. DeleteDeadInstructions();
  4969. continue;
  4970. }
  4971. bool precise = annotation.IsPrecise();
  4972. const std::string &semantic = annotation.GetSemanticString();
  4973. hlsl::InterpolationMode interpMode = annotation.GetInterpolationMode();
  4974. // Push Elts into workList from right to left to preserve the order.
  4975. for (unsigned ri=0;ri<Elts.size();ri++) {
  4976. unsigned i = Elts.size() - ri - 1;
  4977. DxilFieldAnnotation EltAnnotation = GetEltAnnotation(Ty, i, annotation, dxilTypeSys);
  4978. const std::string &eltSem = EltAnnotation.GetSemanticString();
  4979. if (!semantic.empty()) {
  4980. if (!eltSem.empty()) {
  4981. // It doesn't look like we can provide source location information from here
  4982. F->getContext().emitWarning(
  4983. Twine("semantic '") + eltSem + "' on field overridden by function or enclosing type");
  4984. }
  4985. // Inherit semantic from parent, but only preserve it for the first element.
  4986. // Subsequent elements are noted with a special value that gets resolved
  4987. // once the argument is completely flattened.
  4988. EltAnnotation.SetSemanticString(i == 0 ? semantic : ContinuedPseudoSemantic);
  4989. } else if (!eltSem.empty() &&
  4990. semanticTypeMap.count(eltSem) == 0) {
  4991. Type *EltTy = dxilutil::GetArrayEltTy(Ty);
  4992. DXASSERT(EltTy->isStructTy(), "must be a struct type to has semantic.");
  4993. semanticTypeMap[eltSem] = EltTy->getStructElementType(i);
  4994. }
  4995. if (precise)
  4996. EltAnnotation.SetPrecise();
  4997. if (EltAnnotation.GetInterpolationMode().GetKind() == DXIL::InterpolationMode::Undefined)
  4998. EltAnnotation.SetInterpolationMode(interpMode);
  4999. WorkList.push_front({ Elts[i], EltAnnotation });
  5000. }
  5001. ++NumReplaced;
  5002. if (Instruction *I = dyn_cast<Instruction>(V))
  5003. deadAllocas.emplace_back(I);
  5004. } else {
  5005. Type *Ty = V->getType();
  5006. if (Ty->isPointerTy())
  5007. Ty = Ty->getPointerElementType();
  5008. // Flatten array of SV_Target.
  5009. StringRef semanticStr = annotation.GetSemanticString();
  5010. if (semanticStr.upper().find("SV_TARGET") == 0 &&
  5011. Ty->isArrayTy()) {
  5012. Type *Ty = cast<ArrayType>(V->getType()->getPointerElementType());
  5013. StringRef targetStr;
  5014. unsigned targetIndex;
  5015. Semantic::DecomposeNameAndIndex(semanticStr, &targetStr, &targetIndex);
  5016. // Replace target parameter with local target.
  5017. AllocaInst *localTarget = AllocaBuilder.CreateAlloca(Ty);
  5018. V->replaceAllUsesWith(localTarget);
  5019. unsigned arraySize = 1;
  5020. std::vector<unsigned> arraySizeList;
  5021. while (Ty->isArrayTy()) {
  5022. unsigned size = Ty->getArrayNumElements();
  5023. arraySizeList.emplace_back(size);
  5024. arraySize *= size;
  5025. Ty = Ty->getArrayElementType();
  5026. }
  5027. unsigned arrayLevel = arraySizeList.size();
  5028. std::vector<unsigned> arrayIdxList(arrayLevel, 0);
  5029. // Create flattened target.
  5030. DxilFieldAnnotation EltAnnotation = annotation;
  5031. for (unsigned i=0;i<arraySize;i++) {
  5032. Value *Elt = AllocaBuilder.CreateAlloca(Ty);
  5033. EltAnnotation.SetSemanticString(targetStr.str()+std::to_string(targetIndex+i));
  5034. // Add semantic type.
  5035. semanticTypeMap[EltAnnotation.GetSemanticString()] = Ty;
  5036. WorkList.push_front({ Elt, EltAnnotation });
  5037. // Copy local target to flattened target.
  5038. std::vector<Value*> idxList(arrayLevel+1);
  5039. idxList[0] = Builder.getInt32(0);
  5040. for (unsigned idx=0;idx<arrayLevel; idx++) {
  5041. idxList[idx+1] = Builder.getInt32(arrayIdxList[idx]);
  5042. }
  5043. if (bForParam) {
  5044. // If Argument, copy before each return.
  5045. for (auto &BB : F->getBasicBlockList()) {
  5046. TerminatorInst *TI = BB.getTerminator();
  5047. if (isa<ReturnInst>(TI)) {
  5048. IRBuilder<> RetBuilder(TI);
  5049. Value *Ptr = RetBuilder.CreateGEP(localTarget, idxList);
  5050. Value *V = RetBuilder.CreateLoad(Ptr);
  5051. RetBuilder.CreateStore(V, Elt);
  5052. }
  5053. }
  5054. } else {
  5055. // Else, copy with Builder.
  5056. Value *Ptr = Builder.CreateGEP(localTarget, idxList);
  5057. Value *V = Builder.CreateLoad(Ptr);
  5058. Builder.CreateStore(V, Elt);
  5059. }
  5060. // Update arrayIdxList.
  5061. for (unsigned idx=arrayLevel;idx>0;idx--) {
  5062. arrayIdxList[idx-1]++;
  5063. if (arrayIdxList[idx-1] < arraySizeList[idx-1])
  5064. break;
  5065. arrayIdxList[idx-1] = 0;
  5066. }
  5067. }
  5068. continue;
  5069. }
  5070. // Cast vector/matrix/resource parameter.
  5071. V = castArgumentIfRequired(V, Ty, bOut, inputQual,
  5072. annotation, Builder);
  5073. // Cannot SROA, save it to final parameter list.
  5074. FlatParamList.emplace_back(V);
  5075. // Create ParamAnnotation for V.
  5076. FlatAnnotationList.emplace_back(DxilParameterAnnotation());
  5077. DxilParameterAnnotation &flatParamAnnotation = FlatAnnotationList.back();
  5078. flatParamAnnotation.SetParamInputQual(paramAnnotation.GetParamInputQual());
  5079. flatParamAnnotation.SetInterpolationMode(annotation.GetInterpolationMode());
  5080. flatParamAnnotation.SetSemanticString(annotation.GetSemanticString());
  5081. flatParamAnnotation.SetCompType(annotation.GetCompType().GetKind());
  5082. flatParamAnnotation.SetMatrixAnnotation(annotation.GetMatrixAnnotation());
  5083. flatParamAnnotation.SetPrecise(annotation.IsPrecise());
  5084. flatParamAnnotation.SetResourceAttribute(annotation.GetResourceAttribute());
  5085. // Add debug info.
  5086. if (DDI && V != Arg) {
  5087. Value *TmpV = V;
  5088. // If V is casted, add debug into to original V.
  5089. if (castParamMap.count(V)) {
  5090. TmpV = castParamMap[V].first;
  5091. // One more level for ptr of input vector.
  5092. // It cast from ptr to non-ptr then cast to scalars.
  5093. if (castParamMap.count(TmpV)) {
  5094. TmpV = castParamMap[TmpV].first;
  5095. }
  5096. }
  5097. Type *Ty = TmpV->getType();
  5098. if (Ty->isPointerTy())
  5099. Ty = Ty->getPointerElementType();
  5100. unsigned size = DL.getTypeAllocSize(Ty);
  5101. DIExpression *DDIExp = DIB.createBitPieceExpression(debugOffset, size);
  5102. debugOffset += size;
  5103. DIB.insertDeclare(TmpV, DDI->getVariable(), DDIExp, DDI->getDebugLoc(),
  5104. Builder.GetInsertPoint());
  5105. }
  5106. // Flatten stream out.
  5107. if (HLModule::IsStreamOutputPtrType(V->getType())) {
  5108. // For stream output objects.
  5109. // Create a value as output value.
  5110. Type *outputType = V->getType()->getPointerElementType()->getStructElementType(0);
  5111. Value *outputVal = AllocaBuilder.CreateAlloca(outputType);
  5112. // For each stream.Append(data)
  5113. // transform into
  5114. // d = load data
  5115. // store outputVal, d
  5116. // stream.Append(outputVal)
  5117. for (User *user : V->users()) {
  5118. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5119. unsigned opcode = GetHLOpcode(CI);
  5120. if (opcode == static_cast<unsigned>(IntrinsicOp::MOP_Append)) {
  5121. // At this point, the stream append data argument might or not have been SROA'd
  5122. Value *firstDataPtr = CI->getArgOperand(HLOperandIndex::kStreamAppendDataOpIndex);
  5123. DXASSERT(firstDataPtr->getType()->isPointerTy(), "Append value must be a pointer.");
  5124. if (firstDataPtr->getType()->getPointerElementType() == outputType) {
  5125. // The data has not been SROA'd
  5126. DXASSERT(CI->getNumArgOperands() == (HLOperandIndex::kStreamAppendDataOpIndex + 1),
  5127. "Unexpected number of arguments for non-SROA'd StreamOutput.Append");
  5128. IRBuilder<> Builder(CI);
  5129. llvm::SmallVector<llvm::Value *, 16> idxList;
  5130. SplitCpy(firstDataPtr->getType(), outputVal, firstDataPtr, idxList, Builder, DL,
  5131. dxilTypeSys, &flatParamAnnotation);
  5132. CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
  5133. }
  5134. else {
  5135. // Append has been SROA'd, we might be operating on multiple values
  5136. // with types differing from the stream output type.
  5137. // Flatten store outputVal.
  5138. // Must be struct to be flatten.
  5139. IRBuilder<> Builder(CI);
  5140. llvm::SmallVector<llvm::Value *, 16> IdxList;
  5141. llvm::SmallVector<llvm::Value *, 16> EltPtrList;
  5142. llvm::SmallVector<const DxilFieldAnnotation*, 16> EltAnnotationList;
  5143. // split
  5144. SplitPtr(outputVal, IdxList, outputVal->getType(), flatParamAnnotation,
  5145. EltPtrList, EltAnnotationList, dxilTypeSys, Builder);
  5146. unsigned eltCount = CI->getNumArgOperands()-2;
  5147. DXASSERT_LOCALVAR(eltCount, eltCount == EltPtrList.size(), "invalid element count");
  5148. for (unsigned i = HLOperandIndex::kStreamAppendDataOpIndex; i < CI->getNumArgOperands(); i++) {
  5149. Value *DataPtr = CI->getArgOperand(i);
  5150. Value *EltPtr = EltPtrList[i - HLOperandIndex::kStreamAppendDataOpIndex];
  5151. const DxilFieldAnnotation *EltAnnotation = EltAnnotationList[i - HLOperandIndex::kStreamAppendDataOpIndex];
  5152. llvm::SmallVector<llvm::Value *, 16> IdxList;
  5153. SplitCpy(DataPtr->getType(), EltPtr, DataPtr, IdxList,
  5154. Builder, DL, dxilTypeSys, EltAnnotation);
  5155. CI->setArgOperand(i, EltPtr);
  5156. }
  5157. }
  5158. }
  5159. }
  5160. }
  5161. // Then split output value to generate ParamQual.
  5162. WorkList.push_front({ outputVal, annotation });
  5163. }
  5164. }
  5165. }
  5166. // Now erase any instructions that were made dead while rewriting the
  5167. // alloca.
  5168. DeleteDeadInstructions();
  5169. // Erase dead allocas after all uses deleted.
  5170. for (Instruction *I : deadAllocas)
  5171. I->eraseFromParent();
  5172. unsigned endArgIndex = FlatAnnotationList.size();
  5173. if (bForParam && startArgIndex < endArgIndex) {
  5174. DxilParamInputQual inputQual = paramAnnotation.GetParamInputQual();
  5175. if (inputQual == DxilParamInputQual::OutStream0 ||
  5176. inputQual == DxilParamInputQual::OutStream1 ||
  5177. inputQual == DxilParamInputQual::OutStream2 ||
  5178. inputQual == DxilParamInputQual::OutStream3)
  5179. startArgIndex++;
  5180. DxilParameterAnnotation &flatParamAnnotation =
  5181. FlatAnnotationList[startArgIndex];
  5182. const std::string &semantic = flatParamAnnotation.GetSemanticString();
  5183. if (!semantic.empty())
  5184. allocateSemanticIndex(FlatAnnotationList, startArgIndex,
  5185. semanticTypeMap);
  5186. }
  5187. }
  5188. static bool IsUsedAsCallArg(Value *V) {
  5189. for (User *U : V->users()) {
  5190. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  5191. Function *CalledF = CI->getCalledFunction();
  5192. HLOpcodeGroup group = GetHLOpcodeGroup(CalledF);
  5193. // Skip HL operations.
  5194. if (group != HLOpcodeGroup::NotHL ||
  5195. group == HLOpcodeGroup::HLExtIntrinsic) {
  5196. continue;
  5197. }
  5198. // Skip llvm intrinsic.
  5199. if (CalledF->isIntrinsic())
  5200. continue;
  5201. return true;
  5202. }
  5203. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
  5204. if (IsUsedAsCallArg(GEP))
  5205. return true;
  5206. }
  5207. }
  5208. return false;
  5209. }
  5210. // For function parameter which used in function call and need to be flattened.
  5211. // Replace with tmp alloca.
  5212. void SROA_Parameter_HLSL::preprocessArgUsedInCall(Function *F) {
  5213. if (F->isDeclaration())
  5214. return;
  5215. const DataLayout &DL = m_pHLModule->GetModule()->getDataLayout();
  5216. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  5217. DxilFunctionAnnotation *pFuncAnnot = typeSys.GetFunctionAnnotation(F);
  5218. DXASSERT(pFuncAnnot, "else invalid function");
  5219. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  5220. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  5221. SmallVector<ReturnInst*, 2> retList;
  5222. for (BasicBlock &bb : F->getBasicBlockList()) {
  5223. if (ReturnInst *RI = dyn_cast<ReturnInst>(bb.getTerminator())) {
  5224. retList.emplace_back(RI);
  5225. }
  5226. }
  5227. for (Argument &arg : F->args()) {
  5228. Type *Ty = arg.getType();
  5229. // Only check pointer types.
  5230. if (!Ty->isPointerTy())
  5231. continue;
  5232. Ty = Ty->getPointerElementType();
  5233. // Skip scalar types.
  5234. if (!Ty->isAggregateType() &&
  5235. Ty->getScalarType() == Ty)
  5236. continue;
  5237. bool bUsedInCall = IsUsedAsCallArg(&arg);
  5238. if (bUsedInCall) {
  5239. // Create tmp.
  5240. Value *TmpArg = AllocaBuilder.CreateAlloca(Ty);
  5241. // Replace arg with tmp.
  5242. arg.replaceAllUsesWith(TmpArg);
  5243. DxilParameterAnnotation &paramAnnot = pFuncAnnot->GetParameterAnnotation(arg.getArgNo());
  5244. DxilParamInputQual inputQual = paramAnnot.GetParamInputQual();
  5245. unsigned size = DL.getTypeAllocSize(Ty);
  5246. // Copy between arg and tmp.
  5247. if (inputQual == DxilParamInputQual::In ||
  5248. inputQual == DxilParamInputQual::Inout) {
  5249. // copy arg to tmp.
  5250. CallInst *argToTmp = Builder.CreateMemCpy(TmpArg, &arg, size, 0);
  5251. // Split the memcpy.
  5252. MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(argToTmp), DL, nullptr,
  5253. typeSys);
  5254. }
  5255. if (inputQual == DxilParamInputQual::Out ||
  5256. inputQual == DxilParamInputQual::Inout) {
  5257. for (ReturnInst *RI : retList) {
  5258. IRBuilder<> RetBuilder(RI);
  5259. // copy tmp to arg.
  5260. CallInst *tmpToArg =
  5261. RetBuilder.CreateMemCpy(&arg, TmpArg, size, 0);
  5262. // Split the memcpy.
  5263. MemcpySplitter::SplitMemCpy(cast<MemCpyInst>(tmpToArg), DL, nullptr,
  5264. typeSys);
  5265. }
  5266. }
  5267. // TODO: support other DxilParamInputQual.
  5268. }
  5269. }
  5270. }
  5271. /// moveFunctionBlocks - Move body of F to flatF.
  5272. void SROA_Parameter_HLSL::moveFunctionBody(Function *F, Function *flatF) {
  5273. bool updateRetType = F->getReturnType() != flatF->getReturnType();
  5274. // Splice the body of the old function right into the new function.
  5275. flatF->getBasicBlockList().splice(flatF->begin(), F->getBasicBlockList());
  5276. // Update Block uses.
  5277. if (updateRetType) {
  5278. for (BasicBlock &BB : flatF->getBasicBlockList()) {
  5279. if (updateRetType) {
  5280. // Replace ret with ret void.
  5281. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  5282. // Create store for return.
  5283. IRBuilder<> Builder(RI);
  5284. Builder.CreateRetVoid();
  5285. RI->eraseFromParent();
  5286. }
  5287. }
  5288. }
  5289. }
  5290. }
  5291. static void SplitArrayCopy(Value *V, const DataLayout &DL,
  5292. DxilTypeSystem &typeSys,
  5293. DxilFieldAnnotation *fieldAnnotation) {
  5294. for (auto U = V->user_begin(); U != V->user_end();) {
  5295. User *user = *(U++);
  5296. if (StoreInst *ST = dyn_cast<StoreInst>(user)) {
  5297. Value *ptr = ST->getPointerOperand();
  5298. Value *val = ST->getValueOperand();
  5299. IRBuilder<> Builder(ST);
  5300. SmallVector<Value *, 16> idxList;
  5301. SplitCpy(ptr->getType(), ptr, val, idxList, Builder, DL, typeSys,
  5302. fieldAnnotation);
  5303. ST->eraseFromParent();
  5304. }
  5305. }
  5306. }
  5307. static void CheckArgUsage(Value *V, bool &bLoad, bool &bStore) {
  5308. if (bLoad && bStore)
  5309. return;
  5310. for (User *user : V->users()) {
  5311. if (dyn_cast<LoadInst>(user)) {
  5312. bLoad = true;
  5313. } else if (dyn_cast<StoreInst>(user)) {
  5314. bStore = true;
  5315. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  5316. CheckArgUsage(GEP, bLoad, bStore);
  5317. } else if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5318. if (CI->getType()->isPointerTy())
  5319. CheckArgUsage(CI, bLoad, bStore);
  5320. else {
  5321. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5322. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5323. HLMatLoadStoreOpcode opcode =
  5324. static_cast<HLMatLoadStoreOpcode>(GetHLOpcode(CI));
  5325. switch (opcode) {
  5326. case HLMatLoadStoreOpcode::ColMatLoad:
  5327. case HLMatLoadStoreOpcode::RowMatLoad:
  5328. bLoad = true;
  5329. break;
  5330. case HLMatLoadStoreOpcode::ColMatStore:
  5331. case HLMatLoadStoreOpcode::RowMatStore:
  5332. bStore = true;
  5333. break;
  5334. }
  5335. }
  5336. }
  5337. }
  5338. }
  5339. }
  5340. // AcceptHitAndEndSearch and IgnoreHit both will not return, but require
  5341. // outputs to have been written before the call. Do this by:
  5342. // - inject a return immediately after the call if not there already
  5343. // - LegalizeDxilInputOutputs will inject writes from temp alloca to
  5344. // outputs before each return.
  5345. // - in HLOperationLower, after lowering the intrinsic, move the intrinsic
  5346. // to just before the return.
  5347. static void InjectReturnAfterNoReturnPreserveOutput(HLModule &HLM) {
  5348. for (Function &F : HLM.GetModule()->functions()) {
  5349. if (GetHLOpcodeGroup(&F) == HLOpcodeGroup::HLIntrinsic) {
  5350. for (auto U : F.users()) {
  5351. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  5352. unsigned OpCode = GetHLOpcode(CI);
  5353. if (OpCode == (unsigned)IntrinsicOp::IOP_AcceptHitAndEndSearch ||
  5354. OpCode == (unsigned)IntrinsicOp::IOP_IgnoreHit) {
  5355. Instruction *pNextI = CI->getNextNode();
  5356. // Skip if already has a return immediatly following call
  5357. if (isa<ReturnInst>(pNextI))
  5358. continue;
  5359. // split block and add return:
  5360. BasicBlock *BB = CI->getParent();
  5361. BB->splitBasicBlock(pNextI);
  5362. TerminatorInst *Term = BB->getTerminator();
  5363. Term->eraseFromParent();
  5364. IRBuilder<> Builder(BB);
  5365. llvm::Type *RetTy = CI->getParent()->getParent()->getReturnType();
  5366. if (RetTy->isVoidTy())
  5367. Builder.CreateRetVoid();
  5368. else
  5369. Builder.CreateRet(UndefValue::get(RetTy));
  5370. }
  5371. }
  5372. }
  5373. }
  5374. }
  5375. }
  5376. // Support store to input and load from output.
  5377. static void LegalizeDxilInputOutputs(Function *F,
  5378. DxilFunctionAnnotation *EntryAnnotation,
  5379. const DataLayout &DL,
  5380. DxilTypeSystem &typeSys) {
  5381. BasicBlock &EntryBlk = F->getEntryBlock();
  5382. Module *M = F->getParent();
  5383. // Map from output to the temp created for it.
  5384. std::unordered_map<Argument *, Value*> outputTempMap;
  5385. for (Argument &arg : F->args()) {
  5386. Type *Ty = arg.getType();
  5387. DxilParameterAnnotation &paramAnnotation = EntryAnnotation->GetParameterAnnotation(arg.getArgNo());
  5388. DxilParamInputQual qual = paramAnnotation.GetParamInputQual();
  5389. bool isColMajor = false;
  5390. // Skip arg which is not a pointer.
  5391. if (!Ty->isPointerTy()) {
  5392. if (dxilutil::IsHLSLMatrixType(Ty)) {
  5393. // Replace matrix arg with cast to vec. It will be lowered in
  5394. // DxilGenerationPass.
  5395. isColMajor = paramAnnotation.GetMatrixAnnotation().Orientation ==
  5396. MatrixOrientation::ColumnMajor;
  5397. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  5398. HLCastOpcode opcode = isColMajor ? HLCastOpcode::ColMatrixToVecCast
  5399. : HLCastOpcode::RowMatrixToVecCast;
  5400. Value *undefVal = UndefValue::get(Ty);
  5401. Value *Cast = HLModule::EmitHLOperationCall(
  5402. Builder, HLOpcodeGroup::HLCast, static_cast<unsigned>(opcode), Ty,
  5403. {undefVal}, *M);
  5404. arg.replaceAllUsesWith(Cast);
  5405. // Set arg as the operand.
  5406. CallInst *CI = cast<CallInst>(Cast);
  5407. CI->setArgOperand(HLOperandIndex::kUnaryOpSrc0Idx, &arg);
  5408. }
  5409. continue;
  5410. }
  5411. Ty = Ty->getPointerElementType();
  5412. bool bLoad = false;
  5413. bool bStore = false;
  5414. CheckArgUsage(&arg, bLoad, bStore);
  5415. bool bStoreInputToTemp = false;
  5416. bool bLoadOutputFromTemp = false;
  5417. if (qual == DxilParamInputQual::In && bStore) {
  5418. bStoreInputToTemp = true;
  5419. } else if (qual == DxilParamInputQual::Out && bLoad) {
  5420. bLoadOutputFromTemp = true;
  5421. } else if (bLoad && bStore) {
  5422. switch (qual) {
  5423. case DxilParamInputQual::InputPrimitive:
  5424. case DxilParamInputQual::InputPatch:
  5425. case DxilParamInputQual::OutputPatch: {
  5426. bStoreInputToTemp = true;
  5427. } break;
  5428. case DxilParamInputQual::Inout:
  5429. break;
  5430. default:
  5431. DXASSERT(0, "invalid input qual here");
  5432. }
  5433. } else if (qual == DxilParamInputQual::Inout) {
  5434. // Only replace inout when (bLoad && bStore) == false.
  5435. bLoadOutputFromTemp = true;
  5436. bStoreInputToTemp = true;
  5437. }
  5438. if (dxilutil::IsHLSLMatrixType(Ty)) {
  5439. if (qual == DxilParamInputQual::In)
  5440. bStoreInputToTemp = bLoad;
  5441. else if (qual == DxilParamInputQual::Out)
  5442. bLoadOutputFromTemp = bStore;
  5443. else if (qual == DxilParamInputQual::Inout) {
  5444. bStoreInputToTemp = true;
  5445. bLoadOutputFromTemp = true;
  5446. }
  5447. }
  5448. if (bStoreInputToTemp || bLoadOutputFromTemp) {
  5449. IRBuilder<> AllocaBuilder(EntryBlk.getFirstInsertionPt());
  5450. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&EntryBlk));
  5451. AllocaInst *temp = AllocaBuilder.CreateAlloca(Ty);
  5452. // Replace all uses with temp.
  5453. arg.replaceAllUsesWith(temp);
  5454. // Copy input to temp.
  5455. if (bStoreInputToTemp) {
  5456. llvm::SmallVector<llvm::Value *, 16> idxList;
  5457. // split copy.
  5458. SplitCpy(temp->getType(), temp, &arg, idxList, Builder, DL, typeSys,
  5459. &paramAnnotation);
  5460. }
  5461. // Generate store output, temp later.
  5462. if (bLoadOutputFromTemp) {
  5463. outputTempMap[&arg] = temp;
  5464. }
  5465. }
  5466. }
  5467. for (BasicBlock &BB : F->getBasicBlockList()) {
  5468. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  5469. IRBuilder<> Builder(RI);
  5470. // Copy temp to output.
  5471. for (auto It : outputTempMap) {
  5472. Argument *output = It.first;
  5473. Value *temp = It.second;
  5474. llvm::SmallVector<llvm::Value *, 16> idxList;
  5475. DxilParameterAnnotation &paramAnnotation =
  5476. EntryAnnotation->GetParameterAnnotation(output->getArgNo());
  5477. auto Iter = Builder.GetInsertPoint();
  5478. if (RI != BB.begin())
  5479. Iter--;
  5480. // split copy.
  5481. SplitCpy(output->getType(), output, temp, idxList, Builder, DL, typeSys,
  5482. &paramAnnotation);
  5483. }
  5484. // Clone the return.
  5485. Builder.CreateRet(RI->getReturnValue());
  5486. RI->eraseFromParent();
  5487. }
  5488. }
  5489. }
  5490. void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
  5491. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  5492. DXASSERT(F == m_pHLModule->GetEntryFunction() ||
  5493. m_pHLModule->IsEntryThatUsesSignatures(F),
  5494. "otherwise, createFlattenedFunction called on library function "
  5495. "that should not be flattened.");
  5496. const DataLayout &DL = m_pHLModule->GetModule()->getDataLayout();
  5497. // Skip void (void) function.
  5498. if (F->getReturnType()->isVoidTy() && F->getArgumentList().empty()) {
  5499. return;
  5500. }
  5501. // Clear maps for cast.
  5502. castParamMap.clear();
  5503. vectorEltsMap.clear();
  5504. DxilFunctionAnnotation *funcAnnotation = m_pHLModule->GetFunctionAnnotation(F);
  5505. DXASSERT(funcAnnotation, "must find annotation for function");
  5506. std::deque<Value *> WorkList;
  5507. LLVMContext &Ctx = m_pHLModule->GetCtx();
  5508. std::unique_ptr<BasicBlock> TmpBlockForFuncDecl;
  5509. BasicBlock *EntryBlock;
  5510. if (F->isDeclaration()) {
  5511. // We still want to SROA the parameters, so creaty a dummy
  5512. // function body block to avoid special cases.
  5513. TmpBlockForFuncDecl.reset(BasicBlock::Create(Ctx));
  5514. // Create return as terminator.
  5515. IRBuilder<> RetBuilder(TmpBlockForFuncDecl.get());
  5516. RetBuilder.CreateRetVoid();
  5517. EntryBlock = TmpBlockForFuncDecl.get();
  5518. } else {
  5519. EntryBlock = &F->getEntryBlock();
  5520. }
  5521. std::vector<Value *> FlatParamList;
  5522. std::vector<DxilParameterAnnotation> FlatParamAnnotationList;
  5523. std::vector<int> FlatParamOriArgNoList;
  5524. const bool bForParamTrue = true;
  5525. // Add all argument to worklist.
  5526. for (Argument &Arg : F->args()) {
  5527. // merge GEP use for arg.
  5528. HLModule::MergeGepUse(&Arg);
  5529. unsigned prevFlatParamCount = FlatParamList.size();
  5530. DxilParameterAnnotation &paramAnnotation =
  5531. funcAnnotation->GetParameterAnnotation(Arg.getArgNo());
  5532. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(&Arg);
  5533. flattenArgument(F, &Arg, bForParamTrue, paramAnnotation, FlatParamList,
  5534. FlatParamAnnotationList, EntryBlock, DDI);
  5535. unsigned newFlatParamCount = FlatParamList.size() - prevFlatParamCount;
  5536. for (unsigned i = 0; i < newFlatParamCount; i++) {
  5537. FlatParamOriArgNoList.emplace_back(Arg.getArgNo());
  5538. }
  5539. }
  5540. Type *retType = F->getReturnType();
  5541. std::vector<Value *> FlatRetList;
  5542. std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
  5543. // Split and change to out parameter.
  5544. if (!retType->isVoidTy()) {
  5545. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(EntryBlock));
  5546. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(EntryBlock));
  5547. Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
  5548. DxilParameterAnnotation &retAnnotation =
  5549. funcAnnotation->GetRetTypeAnnotation();
  5550. Module &M = *m_pHLModule->GetModule();
  5551. Type *voidTy = Type::getVoidTy(m_pHLModule->GetCtx());
  5552. // Create DbgDecl for the ret value.
  5553. if (DISubprogram *funcDI = getDISubprogram(F)) {
  5554. DITypeRef RetDITyRef = funcDI->getType()->getTypeArray()[0];
  5555. DITypeIdentifierMap EmptyMap;
  5556. DIType * RetDIType = RetDITyRef.resolve(EmptyMap);
  5557. DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
  5558. DILocalVariable *RetVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_arg_variable, funcDI, F->getName().str() + ".Ret", funcDI->getFile(),
  5559. funcDI->getLine(), RetDIType);
  5560. DIExpression *Expr = nullptr;
  5561. // TODO: how to get col?
  5562. DILocation *DL = DILocation::get(F->getContext(), funcDI->getLine(), 0, funcDI);
  5563. DIB.insertDeclare(retValAddr, RetVar, Expr, DL, Builder.GetInsertPoint());
  5564. }
  5565. for (BasicBlock &BB : F->getBasicBlockList()) {
  5566. if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
  5567. // Create store for return.
  5568. IRBuilder<> RetBuilder(RI);
  5569. if (!retAnnotation.HasMatrixAnnotation()) {
  5570. RetBuilder.CreateStore(RI->getReturnValue(), retValAddr);
  5571. } else {
  5572. bool isRowMajor = retAnnotation.GetMatrixAnnotation().Orientation ==
  5573. MatrixOrientation::RowMajor;
  5574. Value *RetVal = RI->getReturnValue();
  5575. if (!isRowMajor) {
  5576. // Matrix value is row major. ColMatStore require col major.
  5577. // Cast before store.
  5578. RetVal = HLModule::EmitHLOperationCall(
  5579. RetBuilder, HLOpcodeGroup::HLCast,
  5580. static_cast<unsigned>(HLCastOpcode::RowMatrixToColMatrix),
  5581. RetVal->getType(), {RetVal}, M);
  5582. }
  5583. unsigned opcode = static_cast<unsigned>(
  5584. isRowMajor ? HLMatLoadStoreOpcode::RowMatStore
  5585. : HLMatLoadStoreOpcode::ColMatStore);
  5586. HLModule::EmitHLOperationCall(RetBuilder,
  5587. HLOpcodeGroup::HLMatLoadStore, opcode,
  5588. voidTy, {retValAddr, RetVal}, M);
  5589. }
  5590. }
  5591. }
  5592. // Create a fake store to keep retValAddr so it can be flattened.
  5593. if (retValAddr->user_empty()) {
  5594. Builder.CreateStore(UndefValue::get(retType), retValAddr);
  5595. }
  5596. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(retValAddr);
  5597. flattenArgument(F, retValAddr, bForParamTrue,
  5598. funcAnnotation->GetRetTypeAnnotation(), FlatRetList,
  5599. FlatRetAnnotationList, EntryBlock, DDI);
  5600. const int kRetArgNo = -1;
  5601. for (unsigned i = 0; i < FlatRetList.size(); i++) {
  5602. FlatParamOriArgNoList.insert(FlatParamOriArgNoList.begin(), kRetArgNo);
  5603. }
  5604. }
  5605. // Always change return type as parameter.
  5606. // By doing this, no need to check return when generate storeOutput.
  5607. if (FlatRetList.size() ||
  5608. // For empty struct return type.
  5609. !retType->isVoidTy()) {
  5610. // Return value is flattened.
  5611. // Change return value into out parameter.
  5612. retType = Type::getVoidTy(retType->getContext());
  5613. // Merge return data info param data.
  5614. FlatParamList.insert(FlatParamList.begin(), FlatRetList.begin(), FlatRetList.end());
  5615. FlatParamAnnotationList.insert(FlatParamAnnotationList.begin(),
  5616. FlatRetAnnotationList.begin(),
  5617. FlatRetAnnotationList.end());
  5618. }
  5619. std::vector<Type *> FinalTypeList;
  5620. for (Value * arg : FlatParamList) {
  5621. FinalTypeList.emplace_back(arg->getType());
  5622. }
  5623. unsigned extraParamSize = 0;
  5624. if (m_pHLModule->HasDxilFunctionProps(F)) {
  5625. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
  5626. if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
  5627. auto &VS = funcProps.ShaderProps.VS;
  5628. Type *outFloatTy = Type::getFloatPtrTy(F->getContext());
  5629. // Add out float parameter for each clip plane.
  5630. unsigned i=0;
  5631. for (; i < DXIL::kNumClipPlanes; i++) {
  5632. if (!VS.clipPlanes[i])
  5633. break;
  5634. FinalTypeList.emplace_back(outFloatTy);
  5635. }
  5636. extraParamSize = i;
  5637. }
  5638. }
  5639. FunctionType *flatFuncTy = FunctionType::get(retType, FinalTypeList, false);
  5640. // Return if nothing changed.
  5641. if (flatFuncTy == F->getFunctionType()) {
  5642. // Copy semantic allocation.
  5643. if (!FlatParamAnnotationList.empty()) {
  5644. if (!FlatParamAnnotationList[0].GetSemanticString().empty()) {
  5645. for (unsigned i = 0; i < FlatParamAnnotationList.size(); i++) {
  5646. DxilParameterAnnotation &paramAnnotation = funcAnnotation->GetParameterAnnotation(i);
  5647. DxilParameterAnnotation &flatParamAnnotation = FlatParamAnnotationList[i];
  5648. paramAnnotation.SetSemanticIndexVec(flatParamAnnotation.GetSemanticIndexVec());
  5649. paramAnnotation.SetSemanticString(flatParamAnnotation.GetSemanticString());
  5650. }
  5651. }
  5652. }
  5653. if (!F->isDeclaration()) {
  5654. // Support store to input and load from output.
  5655. LegalizeDxilInputOutputs(F, funcAnnotation, DL, typeSys);
  5656. }
  5657. return;
  5658. }
  5659. std::string flatName = F->getName().str() + ".flat";
  5660. DXASSERT(nullptr == F->getParent()->getFunction(flatName),
  5661. "else overwriting existing function");
  5662. Function *flatF =
  5663. cast<Function>(F->getParent()->getOrInsertFunction(flatName, flatFuncTy));
  5664. funcMap[F] = flatF;
  5665. // Update function debug info.
  5666. if (DISubprogram *funcDI = getDISubprogram(F))
  5667. funcDI->replaceFunction(flatF);
  5668. // Create FunctionAnnotation for flatF.
  5669. DxilFunctionAnnotation *flatFuncAnnotation = m_pHLModule->AddFunctionAnnotation(flatF);
  5670. // Don't need to set Ret Info, flatF always return void now.
  5671. // Param Info
  5672. for (unsigned ArgNo = 0; ArgNo < FlatParamAnnotationList.size(); ++ArgNo) {
  5673. DxilParameterAnnotation &paramAnnotation = flatFuncAnnotation->GetParameterAnnotation(ArgNo);
  5674. paramAnnotation = FlatParamAnnotationList[ArgNo];
  5675. }
  5676. // Function Attr and Parameter Attr.
  5677. // Remove sret first.
  5678. if (F->hasStructRetAttr())
  5679. F->removeFnAttr(Attribute::StructRet);
  5680. for (Argument &arg : F->args()) {
  5681. if (arg.hasStructRetAttr()) {
  5682. Attribute::AttrKind SRet [] = {Attribute::StructRet};
  5683. AttributeSet SRetAS = AttributeSet::get(Ctx, arg.getArgNo() + 1, SRet);
  5684. arg.removeAttr(SRetAS);
  5685. }
  5686. }
  5687. AttributeSet AS = F->getAttributes();
  5688. AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
  5689. AttributeSet flatAS;
  5690. flatAS = flatAS.addAttributes(
  5691. Ctx, AttributeSet::FunctionIndex,
  5692. AttributeSet::get(Ctx, AttributeSet::FunctionIndex, FnAttrs));
  5693. if (!F->isDeclaration()) {
  5694. // Only set Param attribute for function has a body.
  5695. for (unsigned ArgNo = 0; ArgNo < FlatParamAnnotationList.size(); ++ArgNo) {
  5696. unsigned oriArgNo = FlatParamOriArgNoList[ArgNo] + 1;
  5697. AttrBuilder paramAttr(AS, oriArgNo);
  5698. if (oriArgNo == AttributeSet::ReturnIndex)
  5699. paramAttr.addAttribute(Attribute::AttrKind::NoAlias);
  5700. flatAS = flatAS.addAttributes(
  5701. Ctx, ArgNo + 1, AttributeSet::get(Ctx, ArgNo + 1, paramAttr));
  5702. }
  5703. }
  5704. flatF->setAttributes(flatAS);
  5705. DXASSERT_LOCALVAR(extraParamSize, flatF->arg_size() == (extraParamSize + FlatParamAnnotationList.size()), "parameter count mismatch");
  5706. // ShaderProps.
  5707. if (m_pHLModule->HasDxilFunctionProps(F)) {
  5708. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
  5709. std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
  5710. flatFuncProps->shaderKind = funcProps.shaderKind;
  5711. flatFuncProps->ShaderProps = funcProps.ShaderProps;
  5712. m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);
  5713. if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
  5714. auto &VS = funcProps.ShaderProps.VS;
  5715. unsigned clipArgIndex = FlatParamAnnotationList.size();
  5716. // Add out float SV_ClipDistance for each clip plane.
  5717. for (unsigned i = 0; i < DXIL::kNumClipPlanes; i++) {
  5718. if (!VS.clipPlanes[i])
  5719. break;
  5720. DxilParameterAnnotation &paramAnnotation =
  5721. flatFuncAnnotation->GetParameterAnnotation(clipArgIndex+i);
  5722. paramAnnotation.SetParamInputQual(DxilParamInputQual::Out);
  5723. Twine semName = Twine("SV_ClipDistance") + Twine(i);
  5724. paramAnnotation.SetSemanticString(semName.str());
  5725. paramAnnotation.SetCompType(DXIL::ComponentType::F32);
  5726. paramAnnotation.AppendSemanticIndex(i);
  5727. }
  5728. }
  5729. }
  5730. if (!F->isDeclaration()) {
  5731. // Move function body into flatF.
  5732. moveFunctionBody(F, flatF);
  5733. // Replace old parameters with flatF Arguments.
  5734. auto argIter = flatF->arg_begin();
  5735. auto flatArgIter = FlatParamList.begin();
  5736. LLVMContext &Context = F->getContext();
  5737. // Parameter cast come from begining of entry block.
  5738. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(flatF));
  5739. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(flatF));
  5740. while (argIter != flatF->arg_end()) {
  5741. Argument *Arg = argIter++;
  5742. if (flatArgIter == FlatParamList.end()) {
  5743. DXASSERT(extraParamSize > 0, "parameter count mismatch");
  5744. break;
  5745. }
  5746. Value *flatArg = *(flatArgIter++);
  5747. if (castParamMap.count(flatArg)) {
  5748. replaceCastParameter(flatArg, castParamMap[flatArg].first, *flatF, Arg,
  5749. castParamMap[flatArg].second, Builder);
  5750. }
  5751. // Update arg debug info.
  5752. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(flatArg);
  5753. if (DDI) {
  5754. if (!flatArg->getType()->isPointerTy()) {
  5755. // Create alloca to hold the debug info.
  5756. Value *allocaArg = nullptr;
  5757. if (flatArg->hasOneUse() && isa<StoreInst>(*flatArg->user_begin())) {
  5758. StoreInst *SI = cast<StoreInst>(*flatArg->user_begin());
  5759. allocaArg = SI->getPointerOperand();
  5760. } else {
  5761. allocaArg = AllocaBuilder.CreateAlloca(flatArg->getType());
  5762. StoreInst *initArg = Builder.CreateStore(flatArg, allocaArg);
  5763. Value *ldArg = Builder.CreateLoad(allocaArg);
  5764. flatArg->replaceAllUsesWith(ldArg);
  5765. initArg->setOperand(0, flatArg);
  5766. }
  5767. Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(allocaArg));
  5768. DDI->setArgOperand(0, VMD);
  5769. } else {
  5770. Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(Arg));
  5771. DDI->setArgOperand(0, VMD);
  5772. }
  5773. }
  5774. flatArg->replaceAllUsesWith(Arg);
  5775. if (isa<Instruction>(flatArg))
  5776. DeadInsts.emplace_back(flatArg);
  5777. HLModule::MergeGepUse(Arg);
  5778. // Flatten store of array parameter.
  5779. if (Arg->getType()->isPointerTy()) {
  5780. Type *Ty = Arg->getType()->getPointerElementType();
  5781. if (Ty->isArrayTy())
  5782. SplitArrayCopy(
  5783. Arg, DL, typeSys,
  5784. &flatFuncAnnotation->GetParameterAnnotation(Arg->getArgNo()));
  5785. }
  5786. }
  5787. // Support store to input and load from output.
  5788. LegalizeDxilInputOutputs(flatF, flatFuncAnnotation, DL, typeSys);
  5789. }
  5790. }
  5791. void SROA_Parameter_HLSL::replaceCall(Function *F, Function *flatF) {
  5792. // Update entry function.
  5793. if (F == m_pHLModule->GetEntryFunction()) {
  5794. m_pHLModule->SetEntryFunction(flatF);
  5795. }
  5796. DXASSERT(F->user_empty(), "otherwise we flattened a library function.");
  5797. }
  5798. // Public interface to the SROA_Parameter_HLSL pass
  5799. ModulePass *llvm::createSROA_Parameter_HLSL() {
  5800. return new SROA_Parameter_HLSL();
  5801. }
  5802. //===----------------------------------------------------------------------===//
  5803. // Lower static global into Alloca.
  5804. //===----------------------------------------------------------------------===//
  5805. namespace {
  5806. class LowerStaticGlobalIntoAlloca : public ModulePass {
  5807. HLModule *m_pHLModule;
  5808. public:
  5809. static char ID; // Pass identification, replacement for typeid
  5810. explicit LowerStaticGlobalIntoAlloca() : ModulePass(ID) {}
  5811. const char *getPassName() const override { return "Lower static global into Alloca"; }
  5812. bool runOnModule(Module &M) override {
  5813. m_pHLModule = &M.GetOrCreateHLModule();
  5814. // Lower static global into allocas.
  5815. std::vector<GlobalVariable *> staticGVs;
  5816. for (GlobalVariable &GV : M.globals()) {
  5817. bool isStaticGlobal =
  5818. dxilutil::IsStaticGlobal(&GV) &&
  5819. GV.getType()->getAddressSpace() == DXIL::kDefaultAddrSpace;
  5820. if (isStaticGlobal &&
  5821. !GV.getType()->getElementType()->isAggregateType()) {
  5822. staticGVs.emplace_back(&GV);
  5823. }
  5824. }
  5825. bool bUpdated = false;
  5826. const DataLayout &DL = M.getDataLayout();
  5827. for (GlobalVariable *GV : staticGVs) {
  5828. bUpdated |= lowerStaticGlobalIntoAlloca(GV, DL);
  5829. }
  5830. return bUpdated;
  5831. }
  5832. private:
  5833. bool lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL);
  5834. };
  5835. }
  5836. bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
  5837. DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
  5838. unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
  5839. PointerStatus PS(size);
  5840. GV->removeDeadConstantUsers();
  5841. PS.analyzePointer(GV, PS, typeSys, /*bStructElt*/ false);
  5842. bool NotStored = (PS.storedType == PointerStatus::StoredType::NotStored) ||
  5843. (PS.storedType == PointerStatus::StoredType::InitializerStored);
  5844. // Make sure GV only used in one function.
  5845. // Skip GV which don't have store.
  5846. if (PS.HasMultipleAccessingFunctions || NotStored)
  5847. return false;
  5848. Function *F = const_cast<Function*>(PS.AccessingFunction);
  5849. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
  5850. AllocaInst *AI = AllocaBuilder.CreateAlloca(GV->getType()->getElementType());
  5851. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  5852. // Store initializer is exist.
  5853. if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
  5854. Builder.CreateStore(GV->getInitializer(), GV);
  5855. }
  5856. ReplaceConstantWithInst(GV, AI, Builder);
  5857. GV->eraseFromParent();
  5858. return true;
  5859. }
  5860. char LowerStaticGlobalIntoAlloca::ID = 0;
  5861. INITIALIZE_PASS(LowerStaticGlobalIntoAlloca, "static-global-to-alloca",
  5862. "Lower static global into Alloca", false,
  5863. false)
  5864. // Public interface to the LowerStaticGlobalIntoAlloca pass
  5865. ModulePass *llvm::createLowerStaticGlobalIntoAlloca() {
  5866. return new LowerStaticGlobalIntoAlloca();
  5867. }
  5868. //===----------------------------------------------------------------------===//
  5869. // Lower one type to another type.
  5870. //===----------------------------------------------------------------------===//
  5871. namespace {
  5872. class LowerTypePass : public ModulePass {
  5873. public:
  5874. explicit LowerTypePass(char &ID)
  5875. : ModulePass(ID) {}
  5876. bool runOnModule(Module &M) override;
  5877. private:
  5878. bool runOnFunction(Function &F, bool HasDbgInfo);
  5879. AllocaInst *lowerAlloca(AllocaInst *A);
  5880. GlobalVariable *lowerInternalGlobal(GlobalVariable *GV);
  5881. protected:
  5882. virtual bool needToLower(Value *V) = 0;
  5883. virtual void lowerUseWithNewValue(Value *V, Value *NewV) = 0;
  5884. virtual Type *lowerType(Type *Ty) = 0;
  5885. virtual Constant *lowerInitVal(Constant *InitVal, Type *NewTy) = 0;
  5886. virtual StringRef getGlobalPrefix() = 0;
  5887. virtual void initialize(Module &M) {};
  5888. };
  5889. AllocaInst *LowerTypePass::lowerAlloca(AllocaInst *A) {
  5890. IRBuilder<> AllocaBuilder(A);
  5891. Type *NewTy = lowerType(A->getAllocatedType());
  5892. return AllocaBuilder.CreateAlloca(NewTy);
  5893. }
  5894. GlobalVariable *LowerTypePass::lowerInternalGlobal(GlobalVariable *GV) {
  5895. Type *NewTy = lowerType(GV->getType()->getPointerElementType());
  5896. // So set init val to undef.
  5897. Constant *InitVal = UndefValue::get(NewTy);
  5898. if (GV->hasInitializer()) {
  5899. Constant *OldInitVal = GV->getInitializer();
  5900. if (isa<ConstantAggregateZero>(OldInitVal))
  5901. InitVal = ConstantAggregateZero::get(NewTy);
  5902. else if (!isa<UndefValue>(OldInitVal)) {
  5903. InitVal = lowerInitVal(OldInitVal, NewTy);
  5904. }
  5905. }
  5906. bool isConst = GV->isConstant();
  5907. GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
  5908. unsigned AddressSpace = GV->getType()->getAddressSpace();
  5909. GlobalValue::LinkageTypes linkage = GV->getLinkage();
  5910. Module *M = GV->getParent();
  5911. GlobalVariable *NewGV = new llvm::GlobalVariable(
  5912. *M, NewTy, /*IsConstant*/ isConst, linkage,
  5913. /*InitVal*/ InitVal, GV->getName() + getGlobalPrefix(),
  5914. /*InsertBefore*/ nullptr, TLMode, AddressSpace);
  5915. return NewGV;
  5916. }
  5917. bool LowerTypePass::runOnFunction(Function &F, bool HasDbgInfo) {
  5918. std::vector<AllocaInst *> workList;
  5919. // Scan the entry basic block, adding allocas to the worklist.
  5920. BasicBlock &BB = F.getEntryBlock();
  5921. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
  5922. if (!isa<AllocaInst>(I))
  5923. continue;
  5924. AllocaInst *A = cast<AllocaInst>(I);
  5925. if (needToLower(A))
  5926. workList.emplace_back(A);
  5927. }
  5928. LLVMContext &Context = F.getContext();
  5929. for (AllocaInst *A : workList) {
  5930. AllocaInst *NewA = lowerAlloca(A);
  5931. if (HasDbgInfo) {
  5932. // Add debug info.
  5933. DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A);
  5934. if (DDI) {
  5935. Value *DDIVar = MetadataAsValue::get(Context, DDI->getRawVariable());
  5936. Value *DDIExp = MetadataAsValue::get(Context, DDI->getRawExpression());
  5937. Value *VMD = MetadataAsValue::get(Context, ValueAsMetadata::get(NewA));
  5938. IRBuilder<> debugBuilder(DDI);
  5939. debugBuilder.CreateCall(DDI->getCalledFunction(),
  5940. {VMD, DDIVar, DDIExp});
  5941. }
  5942. }
  5943. // Replace users.
  5944. lowerUseWithNewValue(A, NewA);
  5945. // Remove alloca.
  5946. A->eraseFromParent();
  5947. }
  5948. return true;
  5949. }
  5950. bool LowerTypePass::runOnModule(Module &M) {
  5951. initialize(M);
  5952. // Load up debug information, to cross-reference values and the instructions
  5953. // used to load them.
  5954. bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  5955. llvm::DebugInfoFinder Finder;
  5956. if (HasDbgInfo) {
  5957. Finder.processModule(M);
  5958. }
  5959. std::vector<AllocaInst*> multiDimAllocas;
  5960. for (Function &F : M.functions()) {
  5961. if (F.isDeclaration())
  5962. continue;
  5963. runOnFunction(F, HasDbgInfo);
  5964. }
  5965. // Work on internal global.
  5966. std::vector<GlobalVariable *> vecGVs;
  5967. for (GlobalVariable &GV : M.globals()) {
  5968. if (dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV)) {
  5969. if (needToLower(&GV) && !GV.user_empty())
  5970. vecGVs.emplace_back(&GV);
  5971. }
  5972. }
  5973. for (GlobalVariable *GV : vecGVs) {
  5974. GlobalVariable *NewGV = lowerInternalGlobal(GV);
  5975. // Add debug info.
  5976. if (HasDbgInfo) {
  5977. HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, NewGV);
  5978. }
  5979. // Replace users.
  5980. lowerUseWithNewValue(GV, NewGV);
  5981. // Remove GV.
  5982. GV->removeDeadConstantUsers();
  5983. GV->eraseFromParent();
  5984. }
  5985. return true;
  5986. }
  5987. }
  5988. //===----------------------------------------------------------------------===//
  5989. // DynamicIndexingVector to Array.
  5990. //===----------------------------------------------------------------------===//
  5991. namespace {
  5992. class DynamicIndexingVectorToArray : public LowerTypePass {
  5993. bool ReplaceAllVectors;
  5994. public:
  5995. explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
  5996. : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
  5997. static char ID; // Pass identification, replacement for typeid
  5998. void applyOptions(PassOptions O) override;
  5999. void dumpConfig(raw_ostream &OS) override;
  6000. protected:
  6001. bool needToLower(Value *V) override;
  6002. void lowerUseWithNewValue(Value *V, Value *NewV) override;
  6003. Type *lowerType(Type *Ty) override;
  6004. Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
  6005. StringRef getGlobalPrefix() override { return ".v"; }
  6006. private:
  6007. bool HasVectorDynamicIndexing(Value *V);
  6008. void ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
  6009. IRBuilder<> &Builder);
  6010. void ReplaceVecArrayGEP(Value *GEP, ArrayRef<Value *> idxList, Value *A,
  6011. IRBuilder<> &Builder);
  6012. void ReplaceVectorWithArray(Value *Vec, Value *Array);
  6013. void ReplaceVectorArrayWithArray(Value *VecArray, Value *Array);
  6014. void ReplaceStaticIndexingOnVector(Value *V);
  6015. void ReplaceAddrSpaceCast(ConstantExpr *CE,
  6016. Value *A, IRBuilder<> &Builder);
  6017. };
  6018. void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
  6019. GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
  6020. ReplaceAllVectors);
  6021. }
  6022. void DynamicIndexingVectorToArray::dumpConfig(raw_ostream &OS) {
  6023. ModulePass::dumpConfig(OS);
  6024. OS << ",ReplaceAllVectors=" << ReplaceAllVectors;
  6025. }
  6026. void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
  6027. for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
  6028. Value *User = *(U++);
  6029. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  6030. // Only work on element access for vector.
  6031. if (GEP->getNumOperands() == 3) {
  6032. auto Idx = GEP->idx_begin();
  6033. // Skip the pointer idx.
  6034. Idx++;
  6035. ConstantInt *constIdx = cast<ConstantInt>(Idx);
  6036. for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
  6037. GEPU != GEPE;) {
  6038. Instruction *GEPUser = cast<Instruction>(*(GEPU++));
  6039. IRBuilder<> Builder(GEPUser);
  6040. if (LoadInst *ldInst = dyn_cast<LoadInst>(GEPUser)) {
  6041. // Change
  6042. // ld a->x
  6043. // into
  6044. // b = ld a
  6045. // b.x
  6046. Value *ldVal = Builder.CreateLoad(V);
  6047. Value *Elt = Builder.CreateExtractElement(ldVal, constIdx);
  6048. ldInst->replaceAllUsesWith(Elt);
  6049. ldInst->eraseFromParent();
  6050. } else {
  6051. // Change
  6052. // st val, a->x
  6053. // into
  6054. // tmp = ld a
  6055. // tmp.x = val
  6056. // st tmp, a
  6057. // Must be store inst here.
  6058. StoreInst *stInst = cast<StoreInst>(GEPUser);
  6059. Value *val = stInst->getValueOperand();
  6060. Value *ldVal = Builder.CreateLoad(V);
  6061. ldVal = Builder.CreateInsertElement(ldVal, val, constIdx);
  6062. Builder.CreateStore(ldVal, V);
  6063. stInst->eraseFromParent();
  6064. }
  6065. }
  6066. GEP->eraseFromParent();
  6067. } else if (GEP->getNumIndices() == 1) {
  6068. Value *Idx = *GEP->idx_begin();
  6069. if (ConstantInt *C = dyn_cast<ConstantInt>(Idx)) {
  6070. if (C->getLimitedValue() == 0) {
  6071. GEP->replaceAllUsesWith(V);
  6072. GEP->eraseFromParent();
  6073. }
  6074. }
  6075. }
  6076. }
  6077. }
  6078. }
  6079. bool DynamicIndexingVectorToArray::needToLower(Value *V) {
  6080. Type *Ty = V->getType()->getPointerElementType();
  6081. if (dyn_cast<VectorType>(Ty)) {
  6082. if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
  6083. return true;
  6084. }
  6085. // Don't lower local vector which only static indexing.
  6086. if (HasVectorDynamicIndexing(V)) {
  6087. return true;
  6088. } else {
  6089. // Change vector indexing with ld st.
  6090. ReplaceStaticIndexingOnVector(V);
  6091. return false;
  6092. }
  6093. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  6094. // Array must be replaced even without dynamic indexing to remove vector
  6095. // type in dxil.
  6096. // TODO: optimize static array index in later pass.
  6097. Type *EltTy = dxilutil::GetArrayEltTy(AT);
  6098. return isa<VectorType>(EltTy);
  6099. }
  6100. return false;
  6101. }
  6102. void DynamicIndexingVectorToArray::ReplaceVecGEP(Value *GEP, ArrayRef<Value *> idxList,
  6103. Value *A, IRBuilder<> &Builder) {
  6104. Value *newGEP = Builder.CreateGEP(A, idxList);
  6105. if (GEP->getType()->getPointerElementType()->isVectorTy()) {
  6106. ReplaceVectorWithArray(GEP, newGEP);
  6107. } else {
  6108. GEP->replaceAllUsesWith(newGEP);
  6109. }
  6110. }
  6111. void DynamicIndexingVectorToArray::ReplaceAddrSpaceCast(ConstantExpr *CE,
  6112. Value *A, IRBuilder<> &Builder) {
  6113. // create new AddrSpaceCast.
  6114. Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
  6115. A,
  6116. PointerType::get(A->getType()->getPointerElementType(),
  6117. CE->getType()->getPointerAddressSpace()));
  6118. ReplaceVectorWithArray(CE, NewAddrSpaceCast);
  6119. }
  6120. void DynamicIndexingVectorToArray::ReplaceVectorWithArray(Value *Vec, Value *A) {
  6121. unsigned size = Vec->getType()->getPointerElementType()->getVectorNumElements();
  6122. for (auto U = Vec->user_begin(); U != Vec->user_end();) {
  6123. User *User = (*U++);
  6124. // GlobalVariable user.
  6125. if (ConstantExpr * CE = dyn_cast<ConstantExpr>(User)) {
  6126. if (User->user_empty())
  6127. continue;
  6128. if (GEPOperator *GEP = dyn_cast<GEPOperator>(User)) {
  6129. IRBuilder<> Builder(Vec->getContext());
  6130. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  6131. ReplaceVecGEP(GEP, idxList, A, Builder);
  6132. continue;
  6133. } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  6134. IRBuilder<> Builder(Vec->getContext());
  6135. ReplaceAddrSpaceCast(CE, A, Builder);
  6136. continue;
  6137. }
  6138. DXASSERT(0, "not implemented yet");
  6139. }
  6140. // Instrution user.
  6141. Instruction *UserInst = cast<Instruction>(User);
  6142. IRBuilder<> Builder(UserInst);
  6143. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  6144. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  6145. ReplaceVecGEP(cast<GEPOperator>(GEP), idxList, A, Builder);
  6146. GEP->eraseFromParent();
  6147. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(User)) {
  6148. // If ld whole struct, need to split the load.
  6149. Value *newLd = UndefValue::get(ldInst->getType());
  6150. Value *zero = Builder.getInt32(0);
  6151. for (unsigned i = 0; i < size; i++) {
  6152. Value *idx = Builder.getInt32(i);
  6153. Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
  6154. Value *Elt = Builder.CreateLoad(GEP);
  6155. newLd = Builder.CreateInsertElement(newLd, Elt, i);
  6156. }
  6157. ldInst->replaceAllUsesWith(newLd);
  6158. ldInst->eraseFromParent();
  6159. } else if (StoreInst *stInst = dyn_cast<StoreInst>(User)) {
  6160. Value *val = stInst->getValueOperand();
  6161. Value *zero = Builder.getInt32(0);
  6162. for (unsigned i = 0; i < size; i++) {
  6163. Value *Elt = Builder.CreateExtractElement(val, i);
  6164. Value *idx = Builder.getInt32(i);
  6165. Value *GEP = Builder.CreateInBoundsGEP(A, {zero, idx});
  6166. Builder.CreateStore(Elt, GEP);
  6167. }
  6168. stInst->eraseFromParent();
  6169. } else {
  6170. // Vector parameter should be lowered.
  6171. // No function call should use vector.
  6172. DXASSERT(0, "not implement yet");
  6173. }
  6174. }
  6175. }
  6176. void DynamicIndexingVectorToArray::ReplaceVecArrayGEP(Value *GEP,
  6177. ArrayRef<Value *> idxList, Value *A,
  6178. IRBuilder<> &Builder) {
  6179. Value *newGEP = Builder.CreateGEP(A, idxList);
  6180. Type *Ty = GEP->getType()->getPointerElementType();
  6181. if (Ty->isVectorTy()) {
  6182. ReplaceVectorWithArray(GEP, newGEP);
  6183. } else if (Ty->isArrayTy()) {
  6184. ReplaceVectorArrayWithArray(GEP, newGEP);
  6185. } else {
  6186. DXASSERT(Ty->isSingleValueType(), "must be vector subscript here");
  6187. GEP->replaceAllUsesWith(newGEP);
  6188. }
  6189. }
  6190. void DynamicIndexingVectorToArray::ReplaceVectorArrayWithArray(Value *VA, Value *A) {
  6191. for (auto U = VA->user_begin(); U != VA->user_end();) {
  6192. User *User = *(U++);
  6193. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  6194. IRBuilder<> Builder(GEP);
  6195. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  6196. ReplaceVecArrayGEP(GEP, idxList, A, Builder);
  6197. GEP->eraseFromParent();
  6198. } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
  6199. IRBuilder<> Builder(GEPOp->getContext());
  6200. SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
  6201. ReplaceVecArrayGEP(GEPOp, idxList, A, Builder);
  6202. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
  6203. BCI->setOperand(0, A);
  6204. } else {
  6205. DXASSERT(0, "Array pointer should only used by GEP");
  6206. }
  6207. }
  6208. }
  6209. void DynamicIndexingVectorToArray::lowerUseWithNewValue(Value *V, Value *NewV) {
  6210. Type *Ty = V->getType()->getPointerElementType();
  6211. // Replace V with NewV.
  6212. if (Ty->isVectorTy()) {
  6213. ReplaceVectorWithArray(V, NewV);
  6214. } else {
  6215. ReplaceVectorArrayWithArray(V, NewV);
  6216. }
  6217. }
  6218. Type *DynamicIndexingVectorToArray::lowerType(Type *Ty) {
  6219. if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
  6220. return ArrayType::get(VT->getElementType(), VT->getNumElements());
  6221. } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
  6222. SmallVector<ArrayType *, 4> nestArrayTys;
  6223. nestArrayTys.emplace_back(AT);
  6224. Type *EltTy = AT->getElementType();
  6225. // support multi level of array
  6226. while (EltTy->isArrayTy()) {
  6227. ArrayType *ElAT = cast<ArrayType>(EltTy);
  6228. nestArrayTys.emplace_back(ElAT);
  6229. EltTy = ElAT->getElementType();
  6230. }
  6231. if (EltTy->isVectorTy()) {
  6232. Type *vecAT = ArrayType::get(EltTy->getVectorElementType(),
  6233. EltTy->getVectorNumElements());
  6234. return CreateNestArrayTy(vecAT, nestArrayTys);
  6235. }
  6236. return nullptr;
  6237. }
  6238. return nullptr;
  6239. }
  6240. Constant *DynamicIndexingVectorToArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
  6241. Type *VecTy = InitVal->getType();
  6242. ArrayType *ArrayTy = cast<ArrayType>(NewTy);
  6243. if (VecTy->isVectorTy()) {
  6244. SmallVector<Constant *, 4> Elts;
  6245. for (unsigned i = 0; i < VecTy->getVectorNumElements(); i++) {
  6246. Elts.emplace_back(InitVal->getAggregateElement(i));
  6247. }
  6248. return ConstantArray::get(ArrayTy, Elts);
  6249. } else {
  6250. ArrayType *AT = cast<ArrayType>(VecTy);
  6251. ArrayType *EltArrayTy = cast<ArrayType>(ArrayTy->getElementType());
  6252. SmallVector<Constant *, 4> Elts;
  6253. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  6254. Constant *Elt = lowerInitVal(InitVal->getAggregateElement(i), EltArrayTy);
  6255. Elts.emplace_back(Elt);
  6256. }
  6257. return ConstantArray::get(ArrayTy, Elts);
  6258. }
  6259. }
  6260. bool DynamicIndexingVectorToArray::HasVectorDynamicIndexing(Value *V) {
  6261. return dxilutil::HasDynamicIndexing(V);
  6262. }
  6263. }
  6264. char DynamicIndexingVectorToArray::ID = 0;
  6265. INITIALIZE_PASS(DynamicIndexingVectorToArray, "dynamic-vector-to-array",
  6266. "Replace dynamic indexing vector with array", false,
  6267. false)
  6268. // Public interface to the DynamicIndexingVectorToArray pass
  6269. ModulePass *llvm::createDynamicIndexingVectorToArrayPass(bool ReplaceAllVector) {
  6270. return new DynamicIndexingVectorToArray(ReplaceAllVector);
  6271. }
  6272. //===----------------------------------------------------------------------===//
  6273. // Flatten multi dim array into 1 dim.
  6274. //===----------------------------------------------------------------------===//
  6275. namespace {
  6276. class MultiDimArrayToOneDimArray : public LowerTypePass {
  6277. public:
  6278. explicit MultiDimArrayToOneDimArray() : LowerTypePass(ID) {}
  6279. static char ID; // Pass identification, replacement for typeid
  6280. protected:
  6281. bool needToLower(Value *V) override;
  6282. void lowerUseWithNewValue(Value *V, Value *NewV) override;
  6283. Type *lowerType(Type *Ty) override;
  6284. Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
  6285. StringRef getGlobalPrefix() override { return ".1dim"; }
  6286. };
  6287. bool MultiDimArrayToOneDimArray::needToLower(Value *V) {
  6288. Type *Ty = V->getType()->getPointerElementType();
  6289. ArrayType *AT = dyn_cast<ArrayType>(Ty);
  6290. if (!AT)
  6291. return false;
  6292. if (!isa<ArrayType>(AT->getElementType())) {
  6293. return false;
  6294. } else {
  6295. // Merge all GEP.
  6296. HLModule::MergeGepUse(V);
  6297. return true;
  6298. }
  6299. }
  6300. void ReplaceMultiDimGEP(User *GEP, Value *OneDim, IRBuilder<> &Builder) {
  6301. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  6302. Value *PtrOffset = GEPIt.getOperand();
  6303. ++GEPIt;
  6304. Value *ArrayIdx = GEPIt.getOperand();
  6305. ++GEPIt;
  6306. Value *VecIdx = nullptr;
  6307. for (; GEPIt != E; ++GEPIt) {
  6308. if (GEPIt->isArrayTy()) {
  6309. unsigned arraySize = GEPIt->getArrayNumElements();
  6310. Value *V = GEPIt.getOperand();
  6311. ArrayIdx = Builder.CreateMul(ArrayIdx, Builder.getInt32(arraySize));
  6312. ArrayIdx = Builder.CreateAdd(V, ArrayIdx);
  6313. } else {
  6314. DXASSERT_NOMSG(isa<VectorType>(*GEPIt));
  6315. VecIdx = GEPIt.getOperand();
  6316. }
  6317. }
  6318. Value *NewGEP = nullptr;
  6319. if (!VecIdx)
  6320. NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx});
  6321. else
  6322. NewGEP = Builder.CreateGEP(OneDim, {PtrOffset, ArrayIdx, VecIdx});
  6323. GEP->replaceAllUsesWith(NewGEP);
  6324. }
  6325. void MultiDimArrayToOneDimArray::lowerUseWithNewValue(Value *MultiDim, Value *OneDim) {
  6326. LLVMContext &Context = MultiDim->getContext();
  6327. // All users should be element type.
  6328. // Replace users of AI or GV.
  6329. for (auto it = MultiDim->user_begin(); it != MultiDim->user_end();) {
  6330. User *U = *(it++);
  6331. if (U->user_empty())
  6332. continue;
  6333. if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  6334. BCI->setOperand(0, OneDim);
  6335. continue;
  6336. }
  6337. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
  6338. IRBuilder<> Builder(Context);
  6339. if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
  6340. // NewGEP must be GEPOperator too.
  6341. // No instruction will be build.
  6342. ReplaceMultiDimGEP(U, OneDim, Builder);
  6343. } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  6344. Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
  6345. OneDim,
  6346. PointerType::get(OneDim->getType()->getPointerElementType(),
  6347. CE->getType()->getPointerAddressSpace()));
  6348. lowerUseWithNewValue(CE, NewAddrSpaceCast);
  6349. } else {
  6350. DXASSERT(0, "not implemented");
  6351. }
  6352. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
  6353. IRBuilder<> Builder(GEP);
  6354. ReplaceMultiDimGEP(U, OneDim, Builder);
  6355. GEP->eraseFromParent();
  6356. } else {
  6357. DXASSERT(0, "not implemented");
  6358. }
  6359. }
  6360. }
  6361. Type *MultiDimArrayToOneDimArray::lowerType(Type *Ty) {
  6362. ArrayType *AT = cast<ArrayType>(Ty);
  6363. unsigned arraySize = AT->getNumElements();
  6364. Type *EltTy = AT->getElementType();
  6365. // support multi level of array
  6366. while (EltTy->isArrayTy()) {
  6367. ArrayType *ElAT = cast<ArrayType>(EltTy);
  6368. arraySize *= ElAT->getNumElements();
  6369. EltTy = ElAT->getElementType();
  6370. }
  6371. return ArrayType::get(EltTy, arraySize);
  6372. }
  6373. void FlattenMultiDimConstArray(Constant *V, std::vector<Constant *> &Elts) {
  6374. if (!V->getType()->isArrayTy()) {
  6375. Elts.emplace_back(V);
  6376. } else {
  6377. ArrayType *AT = cast<ArrayType>(V->getType());
  6378. for (unsigned i = 0; i < AT->getNumElements(); i++) {
  6379. FlattenMultiDimConstArray(V->getAggregateElement(i), Elts);
  6380. }
  6381. }
  6382. }
  6383. Constant *MultiDimArrayToOneDimArray::lowerInitVal(Constant *InitVal, Type *NewTy) {
  6384. if (InitVal) {
  6385. // MultiDim array init should be done by store.
  6386. if (isa<ConstantAggregateZero>(InitVal))
  6387. InitVal = ConstantAggregateZero::get(NewTy);
  6388. else if (isa<UndefValue>(InitVal))
  6389. InitVal = UndefValue::get(NewTy);
  6390. else {
  6391. std::vector<Constant *> Elts;
  6392. FlattenMultiDimConstArray(InitVal, Elts);
  6393. InitVal = ConstantArray::get(cast<ArrayType>(NewTy), Elts);
  6394. }
  6395. } else {
  6396. InitVal = UndefValue::get(NewTy);
  6397. }
  6398. return InitVal;
  6399. }
  6400. }
  6401. char MultiDimArrayToOneDimArray::ID = 0;
  6402. INITIALIZE_PASS(MultiDimArrayToOneDimArray, "multi-dim-one-dim",
  6403. "Flatten multi-dim array into one-dim array", false,
  6404. false)
  6405. // Public interface to the SROA_Parameter_HLSL pass
  6406. ModulePass *llvm::createMultiDimArrayToOneDimArrayPass() {
  6407. return new MultiDimArrayToOneDimArray();
  6408. }
  6409. //===----------------------------------------------------------------------===//
  6410. // Lower resource into handle.
  6411. //===----------------------------------------------------------------------===//
  6412. namespace {
  6413. class ResourceToHandle : public LowerTypePass {
  6414. public:
  6415. explicit ResourceToHandle() : LowerTypePass(ID) {}
  6416. static char ID; // Pass identification, replacement for typeid
  6417. protected:
  6418. bool needToLower(Value *V) override;
  6419. void lowerUseWithNewValue(Value *V, Value *NewV) override;
  6420. Type *lowerType(Type *Ty) override;
  6421. Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
  6422. StringRef getGlobalPrefix() override { return ".res"; }
  6423. void initialize(Module &M) override;
  6424. private:
  6425. void ReplaceResourceWithHandle(Value *ResPtr, Value *HandlePtr);
  6426. void ReplaceResourceGEPWithHandleGEP(Value *GEP, ArrayRef<Value *> idxList,
  6427. Value *A, IRBuilder<> &Builder);
  6428. void ReplaceResourceArrayWithHandleArray(Value *VA, Value *A);
  6429. Type *m_HandleTy;
  6430. HLModule *m_pHLM;
  6431. bool m_bIsLib;
  6432. };
  6433. void ResourceToHandle::initialize(Module &M) {
  6434. DXASSERT(M.HasHLModule(), "require HLModule");
  6435. m_pHLM = &M.GetHLModule();
  6436. m_HandleTy = m_pHLM->GetOP()->GetHandleType();
  6437. m_bIsLib = m_pHLM->GetShaderModel()->IsLib();
  6438. }
  6439. bool ResourceToHandle::needToLower(Value *V) {
  6440. Type *Ty = V->getType()->getPointerElementType();
  6441. Ty = dxilutil::GetArrayEltTy(Ty);
  6442. return (dxilutil::IsHLSLObjectType(Ty) &&
  6443. !HLModule::IsStreamOutputType(Ty)) &&
  6444. // Skip lib profile.
  6445. !m_bIsLib;
  6446. }
  6447. Type *ResourceToHandle::lowerType(Type *Ty) {
  6448. if ((dxilutil::IsHLSLObjectType(Ty) && !HLModule::IsStreamOutputType(Ty))) {
  6449. return m_HandleTy;
  6450. }
  6451. ArrayType *AT = cast<ArrayType>(Ty);
  6452. SmallVector<ArrayType *, 4> nestArrayTys;
  6453. nestArrayTys.emplace_back(AT);
  6454. Type *EltTy = AT->getElementType();
  6455. // support multi level of array
  6456. while (EltTy->isArrayTy()) {
  6457. ArrayType *ElAT = cast<ArrayType>(EltTy);
  6458. nestArrayTys.emplace_back(ElAT);
  6459. EltTy = ElAT->getElementType();
  6460. }
  6461. return CreateNestArrayTy(m_HandleTy, nestArrayTys);
  6462. }
  6463. Constant *ResourceToHandle::lowerInitVal(Constant *InitVal, Type *NewTy) {
  6464. DXASSERT(isa<UndefValue>(InitVal), "resource cannot have real init val");
  6465. return UndefValue::get(NewTy);
  6466. }
  6467. void ResourceToHandle::ReplaceResourceWithHandle(Value *ResPtr,
  6468. Value *HandlePtr) {
  6469. for (auto it = ResPtr->user_begin(); it != ResPtr->user_end();) {
  6470. User *U = *(it++);
  6471. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  6472. IRBuilder<> Builder(LI);
  6473. Value *Handle = Builder.CreateLoad(HandlePtr);
  6474. Type *ResTy = LI->getType();
  6475. // Used by createHandle or Store.
  6476. for (auto ldIt = LI->user_begin(); ldIt != LI->user_end();) {
  6477. User *ldU = *(ldIt++);
  6478. if (StoreInst *SI = dyn_cast<StoreInst>(ldU)) {
  6479. Value *TmpRes = HLModule::EmitHLOperationCall(
  6480. Builder, HLOpcodeGroup::HLCast,
  6481. (unsigned)HLCastOpcode::HandleToResCast, ResTy, {Handle},
  6482. *m_pHLM->GetModule());
  6483. SI->replaceUsesOfWith(LI, TmpRes);
  6484. } else {
  6485. CallInst *CI = cast<CallInst>(ldU);
  6486. DXASSERT(hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) == HLOpcodeGroup::HLCreateHandle,
  6487. "must be createHandle");
  6488. CI->replaceAllUsesWith(Handle);
  6489. CI->eraseFromParent();
  6490. }
  6491. }
  6492. LI->eraseFromParent();
  6493. } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  6494. Value *Res = SI->getValueOperand();
  6495. IRBuilder<> Builder(SI);
  6496. // CreateHandle from Res.
  6497. Value *Handle = HLModule::EmitHLOperationCall(
  6498. Builder, HLOpcodeGroup::HLCreateHandle,
  6499. /*opcode*/ 0, m_HandleTy, {Res}, *m_pHLM->GetModule());
  6500. // Store Handle to HandlePtr.
  6501. Builder.CreateStore(Handle, HandlePtr);
  6502. // Remove resource Store.
  6503. SI->eraseFromParent();
  6504. } else if (U->user_empty() && isa<GEPOperator>(U)) {
  6505. continue;
  6506. } else {
  6507. CallInst *CI = cast<CallInst>(U);
  6508. IRBuilder<> Builder(CI);
  6509. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  6510. // Allow user function to use res ptr as argument.
  6511. if (group == HLOpcodeGroup::NotHL) {
  6512. Value *TmpResPtr = Builder.CreateBitCast(HandlePtr, ResPtr->getType());
  6513. CI->replaceUsesOfWith(ResPtr, TmpResPtr);
  6514. } else {
  6515. DXASSERT(0, "invalid operation on resource");
  6516. }
  6517. }
  6518. }
  6519. }
  6520. void ResourceToHandle::ReplaceResourceGEPWithHandleGEP(
  6521. Value *GEP, ArrayRef<Value *> idxList, Value *A, IRBuilder<> &Builder) {
  6522. Value *newGEP = Builder.CreateGEP(A, idxList);
  6523. Type *Ty = GEP->getType()->getPointerElementType();
  6524. if (Ty->isArrayTy()) {
  6525. ReplaceResourceArrayWithHandleArray(GEP, newGEP);
  6526. } else {
  6527. DXASSERT(dxilutil::IsHLSLObjectType(Ty), "must be resource type here");
  6528. ReplaceResourceWithHandle(GEP, newGEP);
  6529. }
  6530. }
  6531. void ResourceToHandle::ReplaceResourceArrayWithHandleArray(Value *VA,
  6532. Value *A) {
  6533. for (auto U = VA->user_begin(); U != VA->user_end();) {
  6534. User *User = *(U++);
  6535. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
  6536. IRBuilder<> Builder(GEP);
  6537. SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
  6538. ReplaceResourceGEPWithHandleGEP(GEP, idxList, A, Builder);
  6539. GEP->eraseFromParent();
  6540. } else if (GEPOperator *GEPOp = dyn_cast<GEPOperator>(User)) {
  6541. IRBuilder<> Builder(GEPOp->getContext());
  6542. SmallVector<Value *, 4> idxList(GEPOp->idx_begin(), GEPOp->idx_end());
  6543. ReplaceResourceGEPWithHandleGEP(GEPOp, idxList, A, Builder);
  6544. } else {
  6545. DXASSERT(0, "Array pointer should only used by GEP");
  6546. }
  6547. }
  6548. }
  6549. void ResourceToHandle::lowerUseWithNewValue(Value *V, Value *NewV) {
  6550. Type *Ty = V->getType()->getPointerElementType();
  6551. // Replace V with NewV.
  6552. if (Ty->isArrayTy()) {
  6553. ReplaceResourceArrayWithHandleArray(V, NewV);
  6554. } else {
  6555. ReplaceResourceWithHandle(V, NewV);
  6556. }
  6557. }
  6558. }
  6559. char ResourceToHandle::ID = 0;
  6560. INITIALIZE_PASS(ResourceToHandle, "resource-handle",
  6561. "Lower resource into handle", false,
  6562. false)
  6563. // Public interface to the ResourceToHandle pass
  6564. ModulePass *llvm::createResourceToHandlePass() {
  6565. return new ResourceToHandle();
  6566. }