HLOperationLower.cpp 261 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/HLSL/DxilModule.h"
  12. #include "dxc/HLSL/DxilOperations.h"
  13. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  14. #include "dxc/HLSL/HLModule.h"
  15. #include "dxc/HLSL/DxilUtil.h"
  16. #include "dxc/HLSL/HLOperationLower.h"
  17. #include "dxc/HLSL/HLOperationLowerExtension.h"
  18. #include "dxc/HLSL/HLOperations.h"
  19. #include "dxc/HlslIntrinsicOp.h"
  20. #include "llvm/IR/GetElementPtrTypeIterator.h"
  21. #include "llvm/IR/IRBuilder.h"
  22. #include "llvm/IR/Instructions.h"
  23. #include "llvm/IR/Module.h"
  24. #include <unordered_set>
  25. using namespace llvm;
  26. using namespace hlsl;
  27. struct HLOperationLowerHelper {
  28. OP &hlslOP;
  29. Type *voidTy;
  30. Type *f32Ty;
  31. Type *i32Ty;
  32. llvm::Type *i1Ty;
  33. Type *i8Ty;
  34. DxilTypeSystem &dxilTypeSys;
  35. DxilFunctionProps *functionProps;
  36. bool bLegacyCBufferLoad;
  37. DataLayout legacyDataLayout;
  38. HLOperationLowerHelper(HLModule &HLM);
  39. };
  40. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  41. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  42. legacyDataLayout(HLModule::GetLegacyDataLayoutDesc()) {
  43. llvm::LLVMContext &Ctx = HLM.GetCtx();
  44. voidTy = Type::getVoidTy(Ctx);
  45. f32Ty = Type::getFloatTy(Ctx);
  46. i32Ty = Type::getInt32Ty(Ctx);
  47. i1Ty = Type::getInt1Ty(Ctx);
  48. i8Ty = Type::getInt8Ty(Ctx);
  49. Function *EntryFunc = HLM.GetEntryFunction();
  50. functionProps = nullptr;
  51. if (HLM.HasDxilFunctionProps(EntryFunc))
  52. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  53. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  54. }
  55. struct HLObjectOperationLowerHelper {
  56. private:
  57. // For object intrinsics.
  58. HLModule &HLM;
  59. struct ResAttribute {
  60. DXIL::ResourceClass RC;
  61. DXIL::ResourceKind RK;
  62. Type *ResourceType;
  63. };
  64. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  65. std::unordered_set<LoadInst *> &UpdateCounterSet;
  66. std::unordered_set<Value *> &NonUniformSet;
  67. // Map from pointer of cbuffer to pointer of resource.
  68. // For cbuffer like this:
  69. // cbuffer A {
  70. // Texture2D T;
  71. // };
  72. // A global resource Texture2D T2 will be created for Texture2D T.
  73. // CBPtrToResourceMap[T] will return T2.
  74. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  75. public:
  76. HLObjectOperationLowerHelper(HLModule &HLM,
  77. std::unordered_set<LoadInst *> &UpdateCounter,
  78. std::unordered_set<Value *> &NonUniform)
  79. : HLM(HLM), UpdateCounterSet(UpdateCounter), NonUniformSet(NonUniform) {}
  80. DXIL::ResourceClass GetRC(Value *Handle) {
  81. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  82. return Res.RC;
  83. }
  84. DXIL::ResourceKind GetRK(Value *Handle) {
  85. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  86. return Res.RK;
  87. }
  88. Type *GetResourceType(Value *Handle) {
  89. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  90. return Res.ResourceType;
  91. }
  92. void MarkHasCounter(Type *Ty, Value *handle) {
  93. DXIL::ResourceClass RC = GetRC(handle);
  94. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  95. "must UAV for counter");
  96. std::unordered_set<Value *> resSet;
  97. MarkHasCounterOnCreateHandle(handle, resSet);
  98. }
  99. void MarkNonUniform(Value *V) { NonUniformSet.insert(V); }
  100. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  101. GlobalVariable *CbGV, MDNode *MD) {
  102. // Change array idx to 0 to make sure all array ptr share same key.
  103. Value *Key = UniformCbPtr(CbPtr, CbGV);
  104. if (CBPtrToResourceMap.count(Key))
  105. return CBPtrToResourceMap[Key];
  106. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, MD);
  107. CBPtrToResourceMap[Key] = Resource;
  108. return Resource;
  109. }
  110. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  111. // Simple case.
  112. if (ResPtr->getType() == CbPtr->getType())
  113. return ResPtr;
  114. // Array case.
  115. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  116. IRBuilder<> Builder(CbPtr);
  117. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  118. Value *arrayIdx = GEPIt.getOperand();
  119. // Only calc array idx and size.
  120. // Ignore struct type part.
  121. for (; GEPIt != E; ++GEPIt) {
  122. if (GEPIt->isArrayTy()) {
  123. arrayIdx = Builder.CreateMul(
  124. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  125. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  126. }
  127. }
  128. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  129. }
  130. private:
  131. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  132. if (HandleMetaMap.count(Handle))
  133. return HandleMetaMap[Handle];
  134. // Add invalid first to avoid dead loop.
  135. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  136. DXIL::ResourceKind::Invalid,
  137. StructType::get(Type::getVoidTy(HLM.GetCtx()))};
  138. if (Argument *Arg = dyn_cast<Argument>(Handle)) {
  139. MDNode *MD = HLM.GetDxilResourceAttrib(Arg);
  140. if (!MD) {
  141. Handle->getContext().emitError("cannot map resource to handle");
  142. return HandleMetaMap[Handle];
  143. }
  144. DxilResourceBase Res(DxilResource::Class::Invalid);
  145. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  146. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  147. Res.GetGlobalSymbol()->getType()};
  148. HandleMetaMap[Handle] = Attrib;
  149. return HandleMetaMap[Handle];
  150. }
  151. if (LoadInst *LI = dyn_cast<LoadInst>(Handle)) {
  152. Value *Ptr = LI->getPointerOperand();
  153. for (User *U : Ptr->users()) {
  154. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  155. DxilFunctionAnnotation *FnAnnot = HLM.GetFunctionAnnotation(CI->getCalledFunction());
  156. if (FnAnnot) {
  157. for (auto &arg : CI->arg_operands()) {
  158. if (arg == Ptr) {
  159. unsigned argNo = arg.getOperandNo();
  160. DxilParameterAnnotation &ParamAnnot = FnAnnot->GetParameterAnnotation(argNo);
  161. MDNode *MD = ParamAnnot.GetResourceAttribute();
  162. if (!MD) {
  163. Handle->getContext().emitError(
  164. "cannot map resource to handle");
  165. return HandleMetaMap[Handle];
  166. }
  167. DxilResourceBase Res(DxilResource::Class::Invalid);
  168. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  169. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  170. Res.GetGlobalSymbol()->getType()};
  171. HandleMetaMap[Handle] = Attrib;
  172. return HandleMetaMap[Handle];
  173. }
  174. }
  175. }
  176. }
  177. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  178. Value *V = SI->getValueOperand();
  179. ResAttribute Attrib = FindCreateHandleResourceBase(V);
  180. HandleMetaMap[Handle] = Attrib;
  181. return HandleMetaMap[Handle];
  182. }
  183. }
  184. // Cannot find.
  185. Handle->getContext().emitError("cannot map resource to handle");
  186. return HandleMetaMap[Handle];
  187. }
  188. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  189. MDNode *MD = HLM.GetDxilResourceAttrib(CI->getCalledFunction());
  190. if (!MD) {
  191. Handle->getContext().emitError("cannot map resource to handle");
  192. return HandleMetaMap[Handle];
  193. }
  194. DxilResourceBase Res(DxilResource::Class::Invalid);
  195. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  196. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  197. Res.GetGlobalSymbol()->getType()};
  198. HandleMetaMap[Handle] = Attrib;
  199. return HandleMetaMap[Handle];
  200. }
  201. if (SelectInst *Sel = dyn_cast<SelectInst>(Handle)) {
  202. ResAttribute &ResT = FindCreateHandleResourceBase(Sel->getTrueValue());
  203. // Use MDT here, ResourceClass, ResourceID match is done at
  204. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  205. HandleMetaMap[Handle] = ResT;
  206. FindCreateHandleResourceBase(Sel->getFalseValue());
  207. return ResT;
  208. }
  209. if (PHINode *Phi = dyn_cast<PHINode>(Handle)) {
  210. if (Phi->getNumOperands() == 0) {
  211. Handle->getContext().emitError("cannot map resource to handle");
  212. return HandleMetaMap[Handle];
  213. }
  214. ResAttribute &Res0 = FindCreateHandleResourceBase(Phi->getOperand(0));
  215. // Use Res0 here, ResourceClass, ResourceID match is done at
  216. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  217. HandleMetaMap[Handle] = Res0;
  218. for (unsigned i = 1; i < Phi->getNumOperands(); i++) {
  219. FindCreateHandleResourceBase(Phi->getOperand(i));
  220. }
  221. return Res0;
  222. }
  223. Handle->getContext().emitError("cannot map resource to handle");
  224. return HandleMetaMap[Handle];
  225. }
  226. CallInst *FindCreateHandle(Value *handle,
  227. std::unordered_set<Value *> &resSet) {
  228. // Already checked.
  229. if (resSet.count(handle))
  230. return nullptr;
  231. resSet.insert(handle);
  232. if (CallInst *CI = dyn_cast<CallInst>(handle))
  233. return CI;
  234. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  235. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  236. return CI;
  237. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  238. return CI;
  239. return nullptr;
  240. }
  241. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  242. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  243. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  244. return CI;
  245. }
  246. return nullptr;
  247. }
  248. return nullptr;
  249. }
  250. void MarkHasCounterOnCreateHandle(Value *handle,
  251. std::unordered_set<Value *> &resSet) {
  252. // Already checked.
  253. if (resSet.count(handle))
  254. return;
  255. resSet.insert(handle);
  256. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  257. Value *Res =
  258. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  259. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  260. if (!LdRes) {
  261. CI->getContext().emitError(CI, "cannot map resource to handle");
  262. return;
  263. }
  264. UpdateCounterSet.insert(LdRes);
  265. return;
  266. }
  267. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  268. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  269. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  270. }
  271. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  272. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  273. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  274. }
  275. }
  276. }
  277. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  278. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  279. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  280. unsigned i = 0;
  281. IRBuilder<> Builder(HLM.GetCtx());
  282. Value *zero = Builder.getInt32(0);
  283. for (; GEPIt != E; ++GEPIt, ++i) {
  284. if (GEPIt->isArrayTy()) {
  285. // Change array idx to 0 to make sure all array ptr share same key.
  286. idxList[i] = zero;
  287. }
  288. }
  289. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  290. return Key;
  291. }
  292. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  293. MDNode *MD) {
  294. Type *CbTy = CbPtr->getPointerOperandType();
  295. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  296. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  297. unsigned i = 0;
  298. IRBuilder<> Builder(HLM.GetCtx());
  299. unsigned arraySize = 1;
  300. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  301. std::string Name;
  302. for (; GEPIt != E; ++GEPIt, ++i) {
  303. if (GEPIt->isArrayTy()) {
  304. arraySize *= GEPIt->getArrayNumElements();
  305. } else if (GEPIt->isStructTy()) {
  306. DxilStructAnnotation *typeAnnot =
  307. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  308. DXASSERT_NOMSG(typeAnnot);
  309. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  310. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  311. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  312. if (!Name.empty())
  313. Name += ".";
  314. Name += fieldAnnot.GetFieldName();
  315. }
  316. }
  317. Type *Ty = CbPtr->getResultElementType();
  318. if (arraySize > 1) {
  319. Ty = ArrayType::get(Ty, arraySize);
  320. }
  321. return CreateResourceGV(Ty, Name, MD);
  322. }
  323. Value *CreateResourceGV(Type *Ty, StringRef Name, MDNode *MD) {
  324. Module &M = *HLM.GetModule();
  325. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  326. // Create resource and set GV as globalSym.
  327. HLM.AddResourceWithGlobalVariableAndMDNode(GV, MD);
  328. return GV;
  329. }
  330. };
  331. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  332. DXIL::OpCode opcode,
  333. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  334. struct IntrinsicLower {
  335. // Intrinsic opcode.
  336. IntrinsicOp IntriOpcode;
  337. // Lower function.
  338. IntrinsicLowerFuncTy &LowerFunc;
  339. // DXIL opcode if can direct map.
  340. DXIL::OpCode DxilOpcode;
  341. };
  342. // IOP intrinsics.
  343. namespace {
  344. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  345. Type *Ty, Type *RetTy, OP *hlslOP,
  346. IRBuilder<> &Builder) {
  347. unsigned argNum = refArgs.size();
  348. std::vector<Value *> args = refArgs;
  349. if (Ty->isVectorTy()) {
  350. Value *retVal = llvm::UndefValue::get(RetTy);
  351. unsigned vecSize = Ty->getVectorNumElements();
  352. for (unsigned i = 0; i < vecSize; i++) {
  353. // Update vector args, skip known opcode arg.
  354. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  355. argIdx++) {
  356. if (refArgs[argIdx]->getType()->isVectorTy()) {
  357. Value *arg = refArgs[argIdx];
  358. args[argIdx] = Builder.CreateExtractElement(arg, i);
  359. }
  360. }
  361. Value *EltOP =
  362. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  363. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  364. }
  365. return retVal;
  366. } else {
  367. Value *retVal =
  368. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  369. return retVal;
  370. }
  371. }
  372. // Generates a DXIL operation over an overloaded type (Ty), returning a
  373. // RetTy value; when Ty is a vector, it will replicate per-element operations
  374. // into RetTy to rebuild it.
  375. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  376. Type *Ty, Type *RetTy, OP *hlslOP,
  377. IRBuilder<> &Builder) {
  378. Type *EltTy = Ty->getScalarType();
  379. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  380. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  381. }
  382. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  383. Type *Ty, Instruction *Inst, OP *hlslOP) {
  384. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  385. DXASSERT(refArgs[0] == nullptr,
  386. "else caller has already filled the value in");
  387. IRBuilder<> B(Inst);
  388. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  389. const_cast<llvm::Value **>(refArgs.data())[0] =
  390. opArg; // actually stack memory from caller
  391. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  392. }
  393. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  394. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  395. Type *Ty = src->getType();
  396. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  397. Value *args[] = {opArg, src};
  398. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  399. }
  400. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  401. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  402. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  403. Builder);
  404. }
  405. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  406. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  407. Type *Ty = src0->getType();
  408. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  409. Value *args[] = {opArg, src0, src1};
  410. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  411. }
  412. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  413. Value *src2, hlsl::OP *hlslOP,
  414. IRBuilder<> &Builder) {
  415. Type *Ty = src0->getType();
  416. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  417. Value *args[] = {opArg, src0, src1, src2};
  418. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  419. }
  420. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  421. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  422. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  423. IRBuilder<> Builder(CI);
  424. hlsl::OP *hlslOP = &helper.hlslOP;
  425. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  426. return retVal;
  427. }
  428. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  429. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  430. hlsl::OP *hlslOP = &helper.hlslOP;
  431. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  432. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  433. IRBuilder<> Builder(CI);
  434. Value *binOp =
  435. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  436. return binOp;
  437. }
  438. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  439. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  440. hlsl::OP *hlslOP = &helper.hlslOP;
  441. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  442. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  443. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  444. IRBuilder<> Builder(CI);
  445. Value *triOp =
  446. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  447. return triOp;
  448. }
  449. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  450. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  451. hlsl::OP *hlslOP = &helper.hlslOP;
  452. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  453. IRBuilder<> Builder(CI);
  454. Type *Ty = src->getType();
  455. Type *RetTy = Type::getInt1Ty(CI->getContext());
  456. if (Ty->isVectorTy())
  457. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  458. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  459. Value *args[] = {opArg, src};
  460. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  461. }
  462. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  463. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  464. for (User *U : CI->users()) {
  465. if (CastInst *I = dyn_cast<CastInst>(U)) {
  466. pObjHelper->MarkNonUniform(I);
  467. }
  468. }
  469. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  470. pObjHelper->MarkNonUniform(V);
  471. CI->replaceAllUsesWith(V);
  472. return nullptr;
  473. }
  474. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  475. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  476. hlsl::OP *OP = &helper.hlslOP;
  477. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  478. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  479. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  480. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  481. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  482. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  483. unsigned barrierMode;
  484. switch (IOP) {
  485. case IntrinsicOp::IOP_AllMemoryBarrier:
  486. barrierMode = uglobal | g;
  487. break;
  488. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  489. barrierMode = uglobal | g | t;
  490. break;
  491. case IntrinsicOp::IOP_GroupMemoryBarrier:
  492. barrierMode = g;
  493. break;
  494. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  495. barrierMode = g | t;
  496. break;
  497. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  498. barrierMode = uglobal;
  499. break;
  500. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  501. barrierMode = uglobal | t;
  502. break;
  503. default:
  504. DXASSERT(0, "invalid opcode for barrier");
  505. break;
  506. }
  507. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  508. Value *args[] = {opArg, src0};
  509. IRBuilder<> Builder(CI);
  510. Builder.CreateCall(dxilFunc, args);
  511. return nullptr;
  512. }
  513. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  514. OP::OpCode opcode,
  515. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  516. hlsl::OP *hlslOP = &helper.hlslOP;
  517. IRBuilder<> Builder(CI);
  518. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  519. Type *Ty = val->getType();
  520. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255);
  521. if (Ty != Ty->getScalarType()) {
  522. toByteConst =
  523. ConstantVector::getSplat(Ty->getVectorNumElements(), toByteConst);
  524. }
  525. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  526. byte4 =
  527. TrivialDxilUnaryOperation(OP::OpCode::Round_z, byte4, hlslOP, Builder);
  528. return Builder.CreateBitCast(byte4, CI->getType());
  529. }
  530. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  531. OP::OpCode opcode,
  532. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  533. hlsl::OP *hlslOP = &helper.hlslOP;
  534. IRBuilder<> Builder(CI);
  535. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  536. Type *Ty = val->getType();
  537. VectorType *VT = dyn_cast<VectorType>(Ty);
  538. if (!VT) {
  539. CI->getContext().emitError(
  540. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  541. return UndefValue::get(Ty);
  542. }
  543. unsigned size = VT->getNumElements();
  544. if (size != 2 && size != 4) {
  545. CI->getContext().emitError(
  546. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  547. return UndefValue::get(Ty);
  548. }
  549. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  550. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  551. Value *RetVal = UndefValue::get(Ty);
  552. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  553. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  554. for (unsigned i=0; i<size; i+=2) {
  555. Value *low0 = Builder.CreateExtractElement(op0, i);
  556. Value *low1 = Builder.CreateExtractElement(op1, i);
  557. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  558. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  559. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  560. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  561. // Ext i1 to i32
  562. carry = Builder.CreateZExt(carry, helper.i32Ty);
  563. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  564. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  565. Value *hi = Builder.CreateAdd(hi0, hi1);
  566. hi = Builder.CreateAdd(hi, carry);
  567. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  568. }
  569. return RetVal;
  570. }
  571. bool IsValidLoadInput(Value *V) {
  572. // Must be load input.
  573. // TODO: report this error on front-end
  574. if (!isa<CallInst>(V)) {
  575. V->getContext().emitError("attribute evaluation can only be done on values "
  576. "taken directly from inputs");
  577. return false;
  578. }
  579. CallInst *CI = cast<CallInst>(V);
  580. // Must be immediate.
  581. ConstantInt *opArg =
  582. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  583. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  584. if (op != DXIL::OpCode::LoadInput) {
  585. V->getContext().emitError("attribute evaluation can only be done on values "
  586. "taken directly from inputs");
  587. return false;
  588. }
  589. return true;
  590. }
  591. // Apply current shuffle vector mask on top of previous shuffle mask.
  592. // For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
  593. // new mask would be (13,11,12,10)
  594. Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
  595. if (curMask == nullptr) {
  596. return prevMask;
  597. }
  598. unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
  599. SmallVector<uint32_t, 16> Elts;
  600. for (unsigned i = 0; i != size; ++i) {
  601. ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
  602. ConstantInt *IVal =
  603. cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
  604. Elts.emplace_back(IVal->getSExtValue());
  605. }
  606. return ConstantDataVector::get(curMask->getContext(), Elts);
  607. }
  608. Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
  609. Constant *shufMask = nullptr;
  610. if (V->getType()->isVectorTy()) {
  611. // Must be insert element inst. Keeping track of masks for shuffle vector
  612. Value *Vec = V;
  613. while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
  614. shufMask = AccumulateMask(shufMask, shuf->getMask());
  615. Vec = shuf->getOperand(0);
  616. }
  617. // TODO: We are assuming that the operand of insertelement is a LoadInput.
  618. // This will fail on the case where we pass in matrix member using array subscript.
  619. while (!isa<UndefValue>(Vec)) {
  620. InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
  621. Vec = insertInst->getOperand(0);
  622. Value *Elt = insertInst->getOperand(1);
  623. if (IsValidLoadInput(Elt)) {
  624. loadList.emplace_back(cast<CallInst>(Elt));
  625. }
  626. }
  627. } else {
  628. if (IsValidLoadInput(V)) {
  629. loadList.emplace_back(cast<CallInst>(V));
  630. }
  631. }
  632. return shufMask;
  633. }
  634. // Swizzle could reduce the dimensionality of the Type, but
  635. // for temporary insertelement instructions should maintain the existing size of the loadinput.
  636. // So we have to analyze the type of src in order to determine the actual size required.
  637. Type *GetInsertElementTypeForEvaluate(Value *src) {
  638. if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
  639. return src->getType();
  640. }
  641. else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  642. return SV->getOperand(0)->getType();
  643. }
  644. src->getContext().emitError("Invalid type call for EvaluateAttribute function");
  645. return nullptr;
  646. }
  647. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  648. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  649. hlsl::OP *hlslOP = &helper.hlslOP;
  650. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  651. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  652. IRBuilder<> Builder(CI);
  653. std::vector<CallInst*> loadList;
  654. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  655. unsigned size = loadList.size();
  656. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  657. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  658. Type *Ty = GetInsertElementTypeForEvaluate(val);
  659. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  660. Value *result = UndefValue::get(Ty);
  661. for (unsigned i = 0; i < size; i++) {
  662. CallInst *loadInput = loadList[size-1-i];
  663. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  664. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  665. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  666. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  667. result = Builder.CreateInsertElement(result, Elt, i);
  668. }
  669. if (shufMask)
  670. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  671. return result;
  672. }
  673. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  674. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  675. hlsl::OP *hlslOP = &helper.hlslOP;
  676. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  677. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  678. IRBuilder<> Builder(CI);
  679. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  680. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  681. std::vector<CallInst*> loadList;
  682. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  683. unsigned size = loadList.size();
  684. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  685. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  686. Type *Ty = GetInsertElementTypeForEvaluate(val);
  687. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  688. Value *result = UndefValue::get(Ty);
  689. for (unsigned i = 0; i < size; i++) {
  690. CallInst *loadInput = loadList[size-1-i];
  691. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  692. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  693. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  694. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  695. result = Builder.CreateInsertElement(result, Elt, i);
  696. }
  697. if (shufMask)
  698. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  699. return result;
  700. }
  701. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  702. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  703. hlsl::OP *hlslOP = &helper.hlslOP;
  704. Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  705. std::vector<CallInst*> loadList;
  706. Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
  707. unsigned size = loadList.size();
  708. IRBuilder<> Builder(CI);
  709. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  710. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  711. Type *Ty = GetInsertElementTypeForEvaluate(src);
  712. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  713. Value *result = UndefValue::get(Ty);
  714. for (unsigned i = 0; i < size; i++) {
  715. CallInst *loadInput = loadList[size-1-i];
  716. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  717. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  718. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  719. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  720. result = Builder.CreateInsertElement(result, Elt, i);
  721. }
  722. if (shufMask)
  723. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  724. return result;
  725. }
  726. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  727. HLOperationLowerHelper &helper,
  728. HLObjectOperationLowerHelper *pObjHelper,
  729. bool &Translated) {
  730. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  731. hlsl::OP *hlslOP = &helper.hlslOP;
  732. IRBuilder<> Builder(CI);
  733. Type *Ty = CI->getType();
  734. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  735. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  736. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  737. // Check the range of VertexID
  738. Value *vertex0 = Builder.getInt8(0);
  739. Value *vertex1 = Builder.getInt8(1);
  740. Value *vertex2 = Builder.getInt8(2);
  741. if (vertexI8Idx != vertex0 && vertexI8Idx != vertex1 && vertexI8Idx != vertex2) {
  742. CI->getContext().emitError(CI, "VertexID at GetAttributeAtVertex can only range from 0 to 2");
  743. return UndefValue::get(Ty);
  744. }
  745. std::vector<CallInst*> loadList;
  746. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  747. unsigned size = loadList.size();
  748. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  749. Function *evalFunc = hlslOP->GetOpFunc(op, Ty->getScalarType());
  750. Value *result = UndefValue::get(Ty);
  751. for (unsigned i = 0; i < size; ++i) {
  752. CallInst *loadInput = loadList[size - 1 - i];
  753. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  754. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  755. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  756. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  757. result = Builder.CreateInsertElement(result, Elt, i);
  758. }
  759. if (shufMask)
  760. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  761. return result;
  762. }
  763. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  764. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  765. hlsl::OP *hlslOP = &helper.hlslOP;
  766. Type *Ty = Type::getVoidTy(CI->getContext());
  767. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  768. Value *args[] = {opArg};
  769. IRBuilder<> Builder(CI);
  770. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  771. return dxilOp;
  772. }
  773. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  774. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  775. hlsl::OP *hlslOP = &helper.hlslOP;
  776. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  777. IRBuilder<> Builder(CI);
  778. Type *Ty = Type::getVoidTy(CI->getContext());
  779. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  780. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  781. Value *args[] = {opArg, val};
  782. Value *samplePos =
  783. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  784. Value *result = UndefValue::get(CI->getType());
  785. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  786. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  787. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  788. result = Builder.CreateInsertElement(result, samplePosY, 1);
  789. return result;
  790. }
  791. // val QuadReadLaneAt(val, uint);
  792. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  793. OP::OpCode opcode,
  794. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  795. hlsl::OP *hlslOP = &helper.hlslOP;
  796. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  797. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  798. CI->getOperand(1)->getType(), CI, hlslOP);
  799. }
  800. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  801. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  802. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  803. hlsl::OP *hlslOP = &helper.hlslOP;
  804. DXIL::QuadOpKind opKind;
  805. switch (IOP) {
  806. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  807. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  808. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  809. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  810. }
  811. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  812. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  813. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  814. CI->getOperand(1)->getType(), CI, hlslOP);
  815. }
  816. // WaveAllEqual(val<n>)->bool<n>
  817. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  818. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  819. hlsl::OP *hlslOP = &helper.hlslOP;
  820. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  821. IRBuilder<> Builder(CI);
  822. Type *Ty = src->getType();
  823. Type *RetTy = Type::getInt1Ty(CI->getContext());
  824. if (Ty->isVectorTy())
  825. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  826. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  827. Value *args[] = {opArg, src};
  828. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  829. hlslOP, Builder);
  830. }
  831. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  832. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  833. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  834. hlsl::OP *hlslOP = &helper.hlslOP;
  835. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  836. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  837. }
  838. // Wave ballot intrinsic.
  839. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  840. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  841. // The high-level operation is uint4 ballot(i1).
  842. // The DXIL operation is struct.u4 ballot(i1).
  843. // To avoid updating users with more than a simple replace, we translate into
  844. // a call into struct.u4, then reassemble the vector.
  845. // Scalarization and constant propagation take care of cleanup.
  846. IRBuilder<> B(CI);
  847. // Make the DXIL call itself.
  848. hlsl::OP *hlslOP = &helper.hlslOP;
  849. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  850. Value *refArgs[] = { opArg, CI->getOperand(1) };
  851. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  852. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  853. // Assign from the call results into a vector.
  854. Type *ResTy = CI->getType();
  855. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  856. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  857. dxilVal->getType()->getNumContainedTypes() == 4);
  858. // 'x' component is the first vector element, highest bits.
  859. Value *ResVal = llvm::UndefValue::get(ResTy);
  860. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  861. ResVal = B.CreateInsertElement(
  862. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  863. }
  864. return ResVal;
  865. }
  866. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  867. return opcode == OP::OpCode::WaveActiveOp ||
  868. opcode == OP::OpCode::WavePrefixOp;
  869. }
  870. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  871. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  872. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  873. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  874. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  875. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  876. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  877. return (unsigned)DXIL::SignedOpKind::Unsigned;
  878. return (unsigned)DXIL::SignedOpKind::Signed;
  879. }
  880. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  881. switch (IOP) {
  882. // Bit operations.
  883. case IntrinsicOp::IOP_WaveActiveBitOr:
  884. return (unsigned)DXIL::WaveBitOpKind::Or;
  885. case IntrinsicOp::IOP_WaveActiveBitAnd:
  886. return (unsigned)DXIL::WaveBitOpKind::And;
  887. case IntrinsicOp::IOP_WaveActiveBitXor:
  888. return (unsigned)DXIL::WaveBitOpKind::Xor;
  889. // Prefix operations.
  890. case IntrinsicOp::IOP_WavePrefixSum:
  891. case IntrinsicOp::IOP_WavePrefixUSum:
  892. return (unsigned)DXIL::WaveOpKind::Sum;
  893. case IntrinsicOp::IOP_WavePrefixProduct:
  894. case IntrinsicOp::IOP_WavePrefixUProduct:
  895. return (unsigned)DXIL::WaveOpKind::Product;
  896. // Numeric operations.
  897. case IntrinsicOp::IOP_WaveActiveMax:
  898. case IntrinsicOp::IOP_WaveActiveUMax:
  899. return (unsigned)DXIL::WaveOpKind::Max;
  900. case IntrinsicOp::IOP_WaveActiveMin:
  901. case IntrinsicOp::IOP_WaveActiveUMin:
  902. return (unsigned)DXIL::WaveOpKind::Min;
  903. case IntrinsicOp::IOP_WaveActiveSum:
  904. case IntrinsicOp::IOP_WaveActiveUSum:
  905. return (unsigned)DXIL::WaveOpKind::Sum;
  906. case IntrinsicOp::IOP_WaveActiveProduct:
  907. case IntrinsicOp::IOP_WaveActiveUProduct:
  908. default:
  909. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  910. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  911. "else caller passed incorrect value");
  912. return (unsigned)DXIL::WaveOpKind::Product;
  913. }
  914. }
  915. // Wave intrinsics of the form fn(valA)->valA
  916. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  917. HLOperationLowerHelper &helper,
  918. HLObjectOperationLowerHelper *pObjHelper,
  919. bool &Translated) {
  920. hlsl::OP *hlslOP = &helper.hlslOP;
  921. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  922. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  923. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  924. unsigned refArgCount = _countof(refArgs);
  925. if (!WaveIntrinsicNeedsSign(opcode))
  926. refArgCount--;
  927. return TrivialDxilOperation(opcode,
  928. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  929. CI->getOperand(1)->getType(), CI, hlslOP);
  930. }
  931. // Wave intrinsics of the form fn()->val
  932. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  933. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  934. hlsl::OP *hlslOP = &helper.hlslOP;
  935. Value *refArgs[] = {nullptr};
  936. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  937. }
  938. // Wave intrinsics of the form fn(val,lane)->val
  939. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  940. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  941. hlsl::OP *hlslOP = &helper.hlslOP;
  942. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  943. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  944. CI->getOperand(1)->getType(), CI, hlslOP);
  945. }
  946. // Wave intrinsics of the form fn(val)->val
  947. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  948. OP::OpCode opcode,
  949. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  950. hlsl::OP *hlslOP = &helper.hlslOP;
  951. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  952. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  953. CI->getOperand(1)->getType(), CI, hlslOP);
  954. }
  955. Value *TranslateIAbs(CallInst *CI) {
  956. Type *Ty = CI->getType();
  957. Type *EltTy = Ty->getScalarType();
  958. unsigned bitWidth = EltTy->getIntegerBitWidth();
  959. uint64_t mask = ((uint64_t)1) << (bitWidth - 1);
  960. Constant *opMask = ConstantInt::get(EltTy, mask);
  961. if (Ty != EltTy) {
  962. unsigned size = Ty->getVectorNumElements();
  963. opMask = llvm::ConstantVector::getSplat(size, opMask);
  964. }
  965. IRBuilder<> Builder(CI);
  966. return Builder.CreateXor(CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx),
  967. opMask);
  968. }
  969. Value *TransalteAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  970. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  971. hlsl::OP *hlslOP = &helper.hlslOP;
  972. Type *pOverloadTy = CI->getType()->getScalarType();
  973. if (pOverloadTy->isFloatingPointTy()) {
  974. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  975. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  976. hlslOP);
  977. } else
  978. return TranslateIAbs(CI);
  979. }
  980. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  981. Type *Ty = val->getType();
  982. Type *EltTy = Ty->getScalarType();
  983. Constant *zero = nullptr;
  984. if (EltTy->isFloatingPointTy())
  985. zero = ConstantFP::get(EltTy, 0);
  986. else
  987. zero = ConstantInt::get(EltTy, 0);
  988. if (Ty != EltTy) {
  989. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  990. }
  991. if (EltTy->isFloatingPointTy())
  992. return Builder.CreateFCmpUNE(val, zero);
  993. else
  994. return Builder.CreateICmpNE(val, zero);
  995. }
  996. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  997. Value *cond = GenerateCmpNEZero(val, Builder);
  998. Type *Ty = val->getType();
  999. Type *EltTy = Ty->getScalarType();
  1000. if (Ty != EltTy) {
  1001. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1002. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1003. Value *Elt = Builder.CreateExtractElement(cond, i);
  1004. Result = Builder.CreateAnd(Result, Elt);
  1005. }
  1006. return Result;
  1007. } else
  1008. return cond;
  1009. }
  1010. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1011. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1012. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1013. IRBuilder<> Builder(CI);
  1014. return TranslateAllForValue(val, Builder);
  1015. }
  1016. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1017. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1018. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1019. IRBuilder<> Builder(CI);
  1020. Value *cond = GenerateCmpNEZero(val, Builder);
  1021. Type *Ty = val->getType();
  1022. Type *EltTy = Ty->getScalarType();
  1023. if (Ty != EltTy) {
  1024. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1025. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1026. Value *Elt = Builder.CreateExtractElement(cond, i);
  1027. Result = Builder.CreateOr(Result, Elt);
  1028. }
  1029. return Result;
  1030. } else
  1031. return cond;
  1032. }
  1033. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1034. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1035. Type *Ty = CI->getType();
  1036. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1037. IRBuilder<> Builder(CI);
  1038. return Builder.CreateBitCast(op, Ty);
  1039. }
  1040. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1041. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1042. Type *Ty = x->getType();
  1043. Type *outTy = lo->getType()->getPointerElementType();
  1044. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1045. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1046. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1047. if (Ty->isVectorTy()) {
  1048. Value *retValLo = llvm::UndefValue::get(outTy);
  1049. Value *retValHi = llvm::UndefValue::get(outTy);
  1050. unsigned vecSize = Ty->getVectorNumElements();
  1051. for (unsigned i = 0; i < vecSize; i++) {
  1052. Value *Elt = Builder.CreateExtractElement(x, i);
  1053. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1054. hlslOP->GetOpCodeName(opcode));
  1055. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1056. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1057. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1058. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1059. }
  1060. Builder.CreateStore(retValLo, lo);
  1061. Builder.CreateStore(retValHi, hi);
  1062. } else {
  1063. Value *retVal =
  1064. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1065. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1066. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1067. Builder.CreateStore(retValLo, lo);
  1068. Builder.CreateStore(retValHi, hi);
  1069. }
  1070. return nullptr;
  1071. }
  1072. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1073. HLOperationLowerHelper &helper,
  1074. HLObjectOperationLowerHelper *pObjHelper,
  1075. bool &Translated) {
  1076. if (CI->getNumArgOperands() == 2) {
  1077. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1078. } else {
  1079. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1080. hlsl::OP *hlslOP = &helper.hlslOP;
  1081. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1082. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1083. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1084. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1085. IRBuilder<> Builder(CI);
  1086. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1087. }
  1088. }
  1089. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1090. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1091. hlsl::OP *hlslOP = &helper.hlslOP;
  1092. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1093. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1094. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1095. IRBuilder<> Builder(CI);
  1096. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1097. }
  1098. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1099. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1100. hlsl::OP *hlslOP = &helper.hlslOP;
  1101. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1102. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1103. IRBuilder<> Builder(CI);
  1104. Value *tan = Builder.CreateFDiv(y, x);
  1105. return TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1106. }
  1107. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1108. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1109. hlsl::OP *hlslOP = &helper.hlslOP;
  1110. Type *Ty = CI->getType();
  1111. Type *EltTy = Ty->getScalarType();
  1112. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1113. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1114. if (IOP == IntrinsicOp::IOP_uclamp) {
  1115. maxOp = DXIL::OpCode::UMax;
  1116. minOp = DXIL::OpCode::UMin;
  1117. } else if (EltTy->isIntegerTy()) {
  1118. maxOp = DXIL::OpCode::IMax;
  1119. minOp = DXIL::OpCode::IMin;
  1120. }
  1121. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1122. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1123. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1124. IRBuilder<> Builder(CI);
  1125. // min(max(x, minVal), maxVal).
  1126. Value *maxXMinVal =
  1127. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1128. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1129. }
  1130. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1131. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1132. hlsl::OP *hlslOP = &helper.hlslOP;
  1133. Function *discard =
  1134. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1135. IRBuilder<> Builder(CI);
  1136. Value *cond = nullptr;
  1137. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1138. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1139. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1140. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1141. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1142. Value *elt = Builder.CreateExtractElement(arg, i);
  1143. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1144. cond = Builder.CreateOr(cond, eltCond);
  1145. }
  1146. } else
  1147. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1148. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1149. Builder.CreateCall(discard, {opArg, cond});
  1150. return nullptr;
  1151. }
  1152. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1153. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1154. VectorType *VT = cast<VectorType>(CI->getType());
  1155. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1156. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1157. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1158. IRBuilder<> Builder(CI);
  1159. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1160. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1161. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1162. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1163. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1164. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1165. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1166. Value *xy = Builder.CreateFMul(x0, y1);
  1167. Value *yx = Builder.CreateFMul(y0, x1);
  1168. return Builder.CreateFSub(xy, yx);
  1169. };
  1170. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1171. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1172. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1173. Value *cross = UndefValue::get(VT);
  1174. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1175. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1176. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1177. return cross;
  1178. }
  1179. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1180. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1181. IRBuilder<> Builder(CI);
  1182. Type *Ty = CI->getType();
  1183. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1184. // 180/pi.
  1185. // TODO: include M_PI from math.h.
  1186. const double M_PI = 3.14159265358979323846;
  1187. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1188. if (Ty != Ty->getScalarType()) {
  1189. toDegreeConst =
  1190. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1191. }
  1192. return Builder.CreateFMul(toDegreeConst, val);
  1193. }
  1194. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1195. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1196. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1197. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1198. Type *Ty = src1->getType();
  1199. IRBuilder<> Builder(CI);
  1200. Value *Result = UndefValue::get(Ty);
  1201. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1202. // dest.x = 1;
  1203. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1204. // dest.y = src0.y * src1.y;
  1205. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1206. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1207. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1208. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1209. // dest.z = src0.z;
  1210. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1211. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1212. // dest.w = src1.w;
  1213. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1214. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1215. return Result;
  1216. }
  1217. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1218. HLOperationLowerHelper &helper,
  1219. HLObjectOperationLowerHelper *pObjHelper,
  1220. bool &Translated) {
  1221. Value *firstbitHi =
  1222. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1223. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1224. IRBuilder<> Builder(CI);
  1225. Constant *neg1 = Builder.getInt32(-1);
  1226. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1227. Type *Ty = src->getType();
  1228. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1229. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1230. if (Ty == Ty->getScalarType()) {
  1231. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1232. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1233. return Builder.CreateSelect(cond, neg1, sub);
  1234. } else {
  1235. Value *result = UndefValue::get(CI->getType());
  1236. unsigned vecSize = Ty->getVectorNumElements();
  1237. for (unsigned i = 0; i < vecSize; i++) {
  1238. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1239. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1240. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1241. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1242. result = Builder.CreateInsertElement(result, Elt, i);
  1243. }
  1244. return result;
  1245. }
  1246. }
  1247. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1248. HLOperationLowerHelper &helper,
  1249. HLObjectOperationLowerHelper *pObjHelper,
  1250. bool &Translated) {
  1251. Value *firstbitLo =
  1252. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1253. return firstbitLo;
  1254. }
  1255. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1256. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1257. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1258. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1259. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1260. IRBuilder<> Builder(CI);
  1261. Type *Ty = m->getType();
  1262. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1263. // Result = (ambient, diffuse, specular, 1)
  1264. // ambient = 1.
  1265. Constant *oneConst = ConstantFP::get(Ty, 1);
  1266. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1267. // Result.w = 1.
  1268. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1269. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1270. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1271. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1272. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1273. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1274. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h * m).
  1275. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1276. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1277. Value *nhMulM = Builder.CreateFMul(n_dot_h, m);
  1278. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhMulM);
  1279. Result = Builder.CreateInsertElement(Result, spec, 2);
  1280. return Result;
  1281. }
  1282. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1283. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1284. IRBuilder<> Builder(CI);
  1285. Type *Ty = CI->getType();
  1286. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1287. // pi/180.
  1288. // TODO: include M_PI from math.h.
  1289. const double M_PI = 3.14159265358979323846;
  1290. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1291. if (Ty != Ty->getScalarType()) {
  1292. toRadianConst =
  1293. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1294. }
  1295. return Builder.CreateFMul(toRadianConst, val);
  1296. }
  1297. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1298. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1299. IRBuilder<> Builder(CI);
  1300. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1301. Type *Ty = CI->getType();
  1302. Function *f16tof32 =
  1303. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1304. return TrivialDxilOperation(
  1305. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1306. x->getType(), Ty, &helper.hlslOP, Builder);
  1307. }
  1308. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1309. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1310. IRBuilder<> Builder(CI);
  1311. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1312. Type *Ty = CI->getType();
  1313. Function *f32tof16 =
  1314. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1315. return TrivialDxilOperation(
  1316. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1317. x->getType(), Ty, &helper.hlslOP, Builder);
  1318. }
  1319. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1320. IRBuilder<> Builder(CI);
  1321. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1322. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1323. unsigned size = VT->getNumElements();
  1324. if (size > 1) {
  1325. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1326. for (unsigned i = 1; i < size; i++) {
  1327. Elt = Builder.CreateExtractElement(val, i);
  1328. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1329. Sum = Builder.CreateFAdd(Sum, Mul);
  1330. }
  1331. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1332. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1333. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1334. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1335. hlslOP->GetOpCodeName(sqrt));
  1336. } else {
  1337. val = Elt;
  1338. }
  1339. }
  1340. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1341. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1342. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1343. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1344. hlslOP->GetOpCodeName(fabs));
  1345. }
  1346. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1347. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1348. hlsl::OP *hlslOP = &helper.hlslOP;
  1349. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1350. return TranslateLength(CI, val, hlslOP);
  1351. }
  1352. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1353. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1354. hlsl::OP *hlslOP = &helper.hlslOP;
  1355. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1356. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1357. IRBuilder<> Builder(CI);
  1358. Value *Result =
  1359. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1360. Value *intPortion = Builder.CreateFSub(val, Result);
  1361. Builder.CreateStore(intPortion, outIntPtr);
  1362. return Result;
  1363. }
  1364. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1365. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1366. hlsl::OP *hlslOP = &helper.hlslOP;
  1367. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1368. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1369. IRBuilder<> Builder(CI);
  1370. Value *sub = Builder.CreateFSub(src0, src1);
  1371. return TranslateLength(CI, sub, hlslOP);
  1372. }
  1373. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1374. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1375. hlsl::OP *hlslOP = &helper.hlslOP;
  1376. IRBuilder<> Builder(CI);
  1377. Type *Ty = CI->getType();
  1378. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1379. // TODO: include M_LOG2E from math.h.
  1380. const double M_LOG2E = 1.44269504088896340736;
  1381. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1382. if (Ty != Ty->getScalarType()) {
  1383. log2eConst =
  1384. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1385. }
  1386. val = Builder.CreateFMul(log2eConst, val);
  1387. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1388. return exp;
  1389. }
  1390. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1391. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1392. hlsl::OP *hlslOP = &helper.hlslOP;
  1393. IRBuilder<> Builder(CI);
  1394. Type *Ty = CI->getType();
  1395. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1396. // TODO: include M_LN2 from math.h.
  1397. const double M_LN2 = 0.693147180559945309417;
  1398. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1399. if (Ty != Ty->getScalarType()) {
  1400. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1401. }
  1402. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1403. return Builder.CreateFMul(ln2Const, log);
  1404. }
  1405. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1406. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1407. hlsl::OP *hlslOP = &helper.hlslOP;
  1408. IRBuilder<> Builder(CI);
  1409. Type *Ty = CI->getType();
  1410. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1411. // TODO: include M_LN2 from math.h.
  1412. const double M_LN2 = 0.693147180559945309417;
  1413. const double M_LN10 = 2.30258509299404568402;
  1414. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1415. if (Ty != Ty->getScalarType()) {
  1416. log2_10Const =
  1417. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1418. }
  1419. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1420. return Builder.CreateFMul(log2_10Const, log);
  1421. }
  1422. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1423. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1424. hlsl::OP *hlslOP = &helper.hlslOP;
  1425. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1426. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1427. IRBuilder<> Builder(CI);
  1428. Value *div = Builder.CreateFDiv(src0, src1);
  1429. Value *negDiv = Builder.CreateFNeg(div);
  1430. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1431. Value *absDiv =
  1432. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1433. Value *frc =
  1434. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1435. Value *negFrc = Builder.CreateFNeg(frc);
  1436. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1437. return Builder.CreateFMul(realFrc, src1);
  1438. }
  1439. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1440. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1441. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1442. if (isFloat) {
  1443. switch (IOP) {
  1444. case IntrinsicOp::IOP_max:
  1445. opcode = OP::OpCode::FMax;
  1446. break;
  1447. case IntrinsicOp::IOP_min:
  1448. default:
  1449. DXASSERT(IOP == IntrinsicOp::IOP_min, "");
  1450. opcode = OP::OpCode::FMin;
  1451. break;
  1452. }
  1453. }
  1454. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1455. }
  1456. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1457. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1458. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1459. if (isFloat) {
  1460. switch (IOP) {
  1461. case IntrinsicOp::IOP_mad:
  1462. default:
  1463. DXASSERT(IOP == IntrinsicOp::IOP_mad, "");
  1464. opcode = OP::OpCode::FMad;
  1465. break;
  1466. }
  1467. }
  1468. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1469. }
  1470. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1471. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1472. hlsl::OP *hlslOP = &helper.hlslOP;
  1473. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1474. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1475. IRBuilder<> Builder(CI);
  1476. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1477. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1478. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1479. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1480. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1481. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1482. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1483. // int iVal = asint(val);
  1484. Type *dstTy = i32Ty;
  1485. Type *Ty = val->getType();
  1486. if (Ty->isVectorTy()) {
  1487. unsigned vecSize = Ty->getVectorNumElements();
  1488. dstTy = VectorType::get(i32Ty, vecSize);
  1489. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1490. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1491. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1492. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1493. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1494. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1495. }
  1496. // bool ne = val != 0;
  1497. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1498. notZero = Builder.CreateZExt(notZero, dstTy);
  1499. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1500. // temp = intVal & exponentMask;
  1501. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1502. // temp = temp + exponentBias;
  1503. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1504. // temp = temp & ne;
  1505. temp = Builder.CreateAnd(temp, notZero);
  1506. // temp = temp >> exponentShift;
  1507. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1508. // exp = float(temp);
  1509. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1510. Builder.CreateStore(exp, expPtr);
  1511. // temp = iVal & mantisaMask;
  1512. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1513. // temp = temp | mantisaOr;
  1514. temp = Builder.CreateOr(temp, mantisaOrConst);
  1515. // mantisa = temp & ne;
  1516. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1517. return Builder.CreateBitCast(mantisa, Ty);
  1518. }
  1519. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1520. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1521. hlsl::OP *hlslOP = &helper.hlslOP;
  1522. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1523. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1524. IRBuilder<> Builder(CI);
  1525. Value *exp =
  1526. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1527. return Builder.CreateFMul(exp, src0);
  1528. }
  1529. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1530. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1531. hlsl::OP *hlslOP = &helper.hlslOP;
  1532. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1533. IRBuilder<> Builder(CI);
  1534. Value *ddx =
  1535. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1536. Value *absDdx =
  1537. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1538. Value *ddy =
  1539. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1540. Value *absDdy =
  1541. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1542. return Builder.CreateFAdd(absDdx, absDdy);
  1543. }
  1544. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1545. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1546. hlsl::OP *hlslOP = &helper.hlslOP;
  1547. Type *Ty = CI->getType();
  1548. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1549. IRBuilder<> Builder(CI);
  1550. Value *length = TranslateLength(CI, op, hlslOP);
  1551. if (Ty != length->getType()) {
  1552. VectorType *VT = cast<VectorType>(Ty);
  1553. Value *vecLength = UndefValue::get(VT);
  1554. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1555. vecLength = Builder.CreateInsertElement(vecLength, length, i);
  1556. length = vecLength;
  1557. }
  1558. return Builder.CreateFDiv(op, length);
  1559. }
  1560. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1561. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1562. // x + s(y-x)
  1563. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1564. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1565. IRBuilder<> Builder(CI);
  1566. Value *ySubx = Builder.CreateFSub(y, x);
  1567. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1568. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1569. return Builder.CreateFAdd(x, sMulSub);
  1570. }
  1571. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1572. Value *src1, hlsl::OP *hlslOP,
  1573. IRBuilder<> &Builder) {
  1574. Type *Ty = src0->getType()->getScalarType();
  1575. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1576. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1577. SmallVector<Value *, 9> args;
  1578. args.emplace_back(opArg);
  1579. unsigned vecSize = src0->getType()->getVectorNumElements();
  1580. for (unsigned i = 0; i < vecSize; i++)
  1581. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1582. for (unsigned i = 0; i < vecSize; i++)
  1583. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1584. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1585. return dotOP;
  1586. }
  1587. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1588. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1589. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1590. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1591. switch (vecSize) {
  1592. case 4:
  1593. Elt0 = Builder.CreateExtractElement(arg0, 3);
  1594. Elt1 = Builder.CreateExtractElement(arg1, 3);
  1595. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1596. // Pass thru.
  1597. case 3:
  1598. Elt0 = Builder.CreateExtractElement(arg0, 2);
  1599. Elt1 = Builder.CreateExtractElement(arg1, 2);
  1600. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1601. // Pass thru.
  1602. case 2:
  1603. Elt0 = Builder.CreateExtractElement(arg0, 1);
  1604. Elt1 = Builder.CreateExtractElement(arg1, 1);
  1605. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1606. break;
  1607. default:
  1608. case 1:
  1609. DXASSERT(vecSize == 1, "invalid vector size.");
  1610. }
  1611. return Result;
  1612. }
  1613. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1614. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1615. switch (vecSize) {
  1616. case 2:
  1617. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1618. break;
  1619. case 3:
  1620. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1621. break;
  1622. case 4:
  1623. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1624. break;
  1625. default:
  1626. DXASSERT(vecSize == 1, "wrong vector size");
  1627. {
  1628. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1629. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1630. }
  1631. break;
  1632. }
  1633. }
  1634. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1635. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1636. hlsl::OP *hlslOP = &helper.hlslOP;
  1637. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1638. Type *Ty = arg0->getType();
  1639. unsigned vecSize = Ty->getVectorNumElements();
  1640. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1641. IRBuilder<> Builder(CI);
  1642. if (Ty->getScalarType()->isFloatingPointTy()) {
  1643. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1644. } else {
  1645. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1646. }
  1647. }
  1648. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1649. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1650. hlsl::OP *hlslOP = &helper.hlslOP;
  1651. // v = i - 2 * n * dot(i•n).
  1652. IRBuilder<> Builder(CI);
  1653. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1654. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1655. VectorType *VT = cast<VectorType>(i->getType());
  1656. unsigned vecSize = VT->getNumElements();
  1657. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1658. // 2 * dot (i, n).
  1659. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1660. // 2 * n * dot(i, n).
  1661. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1662. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1663. // i - 2 * n * dot(i, n).
  1664. return Builder.CreateFSub(i, nMulDot);
  1665. }
  1666. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1667. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1668. hlsl::OP *hlslOP = &helper.hlslOP;
  1669. // d = dot(i•n);
  1670. // t = 1 - eta * eta * ( 1 - d*d);
  1671. // cond = t >= 1;
  1672. // r = eta * i - (eta * d + sqrt(t)) * n;
  1673. // return cond ? r : 0;
  1674. IRBuilder<> Builder(CI);
  1675. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1676. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1677. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1678. VectorType *VT = cast<VectorType>(i->getType());
  1679. unsigned vecSize = VT->getNumElements();
  1680. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1681. // eta * eta;
  1682. Value *eta2 = Builder.CreateFMul(eta, eta);
  1683. // d*d;
  1684. Value *dot2 = Builder.CreateFMul(dot, dot);
  1685. Constant *one = ConstantFP::get(eta->getType(), 1);
  1686. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1687. // 1- d*d;
  1688. dot2 = Builder.CreateFSub(one, dot2);
  1689. // eta * eta * (1-d*d);
  1690. eta2 = Builder.CreateFMul(dot2, eta2);
  1691. // t = 1 - eta * eta * ( 1 - d*d);
  1692. Value *t = Builder.CreateFSub(one, eta2);
  1693. // cond = t >= 0;
  1694. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1695. // eta * i;
  1696. Value *vecEta = UndefValue::get(VT);
  1697. for (unsigned i = 0; i < vecSize; i++)
  1698. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1699. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1700. // sqrt(t);
  1701. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1702. // eta * d;
  1703. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1704. // eta * d + sqrt(t);
  1705. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1706. // (eta * d + sqrt(t)) * n;
  1707. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1708. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1709. // r = eta * i - (eta * d + sqrt(t)) * n;
  1710. r = Builder.CreateFSub(etaMulI, r);
  1711. Value *refract =
  1712. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1713. return refract;
  1714. }
  1715. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1716. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1717. hlsl::OP *hlslOP = &helper.hlslOP;
  1718. // s = saturate((x-min)/(max-min)).
  1719. IRBuilder<> Builder(CI);
  1720. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1721. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1722. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1723. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1724. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1725. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1726. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1727. Builder);
  1728. // return s * s *(3-2*s).
  1729. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1730. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1731. if (s->getType()->isVectorTy()) {
  1732. unsigned vecSize = s->getType()->getVectorNumElements();
  1733. c2 = ConstantVector::getSplat(vecSize, c2);
  1734. c3 = ConstantVector::getSplat(vecSize, c3);
  1735. }
  1736. Value *sMul2 = Builder.CreateFMul(s, c2);
  1737. Value *result = Builder.CreateFSub(c3, sMul2);
  1738. result = Builder.CreateFMul(s, result);
  1739. result = Builder.CreateFMul(s, result);
  1740. return result;
  1741. }
  1742. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1743. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1744. hlsl::OP *hlslOP = &helper.hlslOP;
  1745. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1746. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1747. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1748. Type *Ty = CI->getType();
  1749. IRBuilder<> Builder(CI);
  1750. Value *vecRef = UndefValue::get(Ty);
  1751. for (unsigned i = 0; i < 4; i++)
  1752. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1753. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1754. Value *srcY = Builder.CreateExtractElement(src, 1);
  1755. Value *byteSrc = UndefValue::get(Ty);
  1756. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1757. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1758. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1759. Value *bfiOpArg =
  1760. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1761. Value *imm8 = hlslOP->GetU32Const(8);
  1762. Value *imm16 = hlslOP->GetU32Const(16);
  1763. Value *imm24 = hlslOP->GetU32Const(24);
  1764. Ty = ref->getType();
  1765. // Get x[31:8].
  1766. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1767. // y[0~7] x[31:8].
  1768. Value *byteSrcElt = TrivialDxilOperation(
  1769. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1770. hlslOP, Builder);
  1771. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  1772. // Get x[31:16].
  1773. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1774. // y[0~15] x[31:16].
  1775. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1776. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  1777. Ty, Ty, hlslOP, Builder);
  1778. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  1779. // Get x[31:24].
  1780. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1781. // y[0~23] x[31:24].
  1782. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1783. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  1784. Ty, Ty, hlslOP, Builder);
  1785. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  1786. // Msad on vecref and byteSrc.
  1787. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  1788. hlslOP, Builder);
  1789. }
  1790. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1791. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1792. Type *Ty = CI->getType();
  1793. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1794. IRBuilder<> Builder(CI);
  1795. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1796. if (Ty != Ty->getScalarType()) {
  1797. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1798. }
  1799. return Builder.CreateFDiv(one, op);
  1800. }
  1801. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1802. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1803. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1804. Type *Ty = val->getType();
  1805. Type *EltTy = Ty->getScalarType();
  1806. IRBuilder<> Builder(CI);
  1807. if (EltTy->isIntegerTy()) {
  1808. Constant *zero = ConstantInt::get(Ty->getScalarType(), 0);
  1809. if (Ty != EltTy) {
  1810. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1811. }
  1812. Value *zeroLtVal = Builder.CreateICmpSLT(zero, val);
  1813. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  1814. Value *valLtZero = Builder.CreateICmpSLT(val, zero);
  1815. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  1816. return Builder.CreateSub(zeroLtVal, valLtZero);
  1817. } else {
  1818. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
  1819. if (Ty != EltTy) {
  1820. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1821. }
  1822. Value *zeroLtVal = Builder.CreateFCmpOLT(zero, val);
  1823. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  1824. Value *valLtZero = Builder.CreateFCmpOLT(val, zero);
  1825. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  1826. return Builder.CreateSub(zeroLtVal, valLtZero);
  1827. }
  1828. }
  1829. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1830. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1831. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1832. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1833. Type *Ty = CI->getType();
  1834. IRBuilder<> Builder(CI);
  1835. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1836. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1837. Value *cond = Builder.CreateFCmpOLT(x, edge);
  1838. if (Ty != Ty->getScalarType()) {
  1839. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1840. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1841. }
  1842. return Builder.CreateSelect(cond, zero, one);
  1843. }
  1844. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1845. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1846. hlsl::OP *hlslOP = &helper.hlslOP;
  1847. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1848. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1849. IRBuilder<> Builder(CI);
  1850. // t = log(x);
  1851. Value *logX =
  1852. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  1853. // t = y * t;
  1854. Value *mulY = Builder.CreateFMul(logX, y);
  1855. // pow = exp(t);
  1856. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  1857. }
  1858. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1859. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1860. hlsl::OP *hlslOP = &helper.hlslOP;
  1861. Type *Ty = CI->getType();
  1862. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1863. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1864. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1865. IRBuilder<> Builder(CI);
  1866. unsigned vecSize = Ty->getVectorNumElements();
  1867. // -n x sign(dot(i, ng)).
  1868. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  1869. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1870. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  1871. Value *negN = Builder.CreateFNeg(n);
  1872. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  1873. return faceforward;
  1874. }
  1875. }
  1876. // MOP intrinsics
  1877. namespace {
  1878. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1879. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1880. hlsl::OP *hlslOP = &helper.hlslOP;
  1881. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1882. IRBuilder<> Builder(CI);
  1883. Value *sampleIdx =
  1884. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  1885. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  1886. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1887. Function *dxilFunc =
  1888. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1889. Value *args[] = {opArg, handle, sampleIdx};
  1890. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  1891. Value *result = UndefValue::get(CI->getType());
  1892. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  1893. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  1894. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  1895. result = Builder.CreateInsertElement(result, samplePosY, 1);
  1896. return result;
  1897. }
  1898. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1899. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1900. hlsl::OP *hlslOP = &helper.hlslOP;
  1901. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1902. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  1903. IRBuilder<> Builder(CI);
  1904. OP::OpCode opcode = OP::OpCode::GetDimensions;
  1905. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1906. Function *dxilFunc =
  1907. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1908. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1909. Value *mipLevel = UndefValue::get(i32Ty);
  1910. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  1911. switch (RK) {
  1912. case DxilResource::Kind::Texture1D:
  1913. case DxilResource::Kind::Texture1DArray:
  1914. case DxilResource::Kind::Texture2D:
  1915. case DxilResource::Kind::Texture2DArray:
  1916. case DxilResource::Kind::TextureCube:
  1917. case DxilResource::Kind::TextureCubeArray:
  1918. case DxilResource::Kind::Texture3D: {
  1919. Value *opMipLevel =
  1920. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  1921. // mipLevel is in parameter, should not be pointer.
  1922. if (!opMipLevel->getType()->isPointerTy())
  1923. mipLevel = opMipLevel;
  1924. else {
  1925. // No mip level.
  1926. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  1927. mipLevel = ConstantInt::get(i32Ty, 0);
  1928. }
  1929. } break;
  1930. default:
  1931. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  1932. break;
  1933. }
  1934. Value *args[] = {opArg, handle, mipLevel};
  1935. Value *dims = Builder.CreateCall(dxilFunc, args);
  1936. unsigned dimensionIdx = 0;
  1937. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  1938. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  1939. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  1940. width = Builder.CreateSIToFP(width,
  1941. widthPtr->getType()->getPointerElementType());
  1942. Builder.CreateStore(width, widthPtr);
  1943. if (RK == DxilResource::Kind::StructuredBuffer) {
  1944. // Set stride.
  1945. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  1946. const DataLayout &DL = helper.legacyDataLayout;
  1947. Value *buf = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1948. Type *bufTy = buf->getType();
  1949. Type *bufRetTy = bufTy->getStructElementType(0);
  1950. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  1951. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  1952. } else {
  1953. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  1954. // Samples is in w channel too.
  1955. RK == DXIL::ResourceKind::Texture2DMS) {
  1956. // Has mip.
  1957. for (unsigned argIdx = widthOpIdx + 1;
  1958. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  1959. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  1960. Value *ptr = CI->getArgOperand(argIdx);
  1961. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  1962. dim = Builder.CreateSIToFP(dim,
  1963. ptr->getType()->getPointerElementType());
  1964. Builder.CreateStore(dim, ptr);
  1965. }
  1966. // NumOfLevel is in w channel.
  1967. dimensionIdx = 3;
  1968. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  1969. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  1970. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  1971. dim =
  1972. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  1973. Builder.CreateStore(dim, ptr);
  1974. } else {
  1975. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  1976. argIdx++) {
  1977. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  1978. Value *ptr = CI->getArgOperand(argIdx);
  1979. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  1980. dim = Builder.CreateSIToFP(dim,
  1981. ptr->getType()->getPointerElementType());
  1982. Builder.CreateStore(dim, ptr);
  1983. }
  1984. }
  1985. }
  1986. return nullptr;
  1987. }
  1988. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1989. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1990. hlsl::OP *hlslOP = &helper.hlslOP;
  1991. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1992. pObjHelper->MarkHasCounter(handle->getType(), handle);
  1993. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  1994. IRBuilder<> Builder(CI);
  1995. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  1996. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  1997. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  1998. // Create BufferUpdateCounter call.
  1999. Value *Args[] = {OpCodeArg, handle, IncVal};
  2000. Function *F =
  2001. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2002. return Builder.CreateCall(F, Args);
  2003. }
  2004. Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2005. // Extract value part.
  2006. Value *retVal = llvm::UndefValue::get(RetTy);
  2007. if (RetTy->isVectorTy()) {
  2008. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2009. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2010. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2011. }
  2012. } else {
  2013. retVal = Builder.CreateExtractValue(ResRet, 0);
  2014. }
  2015. return retVal;
  2016. }
  2017. Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2018. // Extract value part.
  2019. Value *retVal = llvm::UndefValue::get(RetTy);
  2020. if (RetTy->isVectorTy()) {
  2021. unsigned vecSize = RetTy->getVectorNumElements();
  2022. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2023. for (unsigned i = 0; i < vecSize; i++) {
  2024. Value *retComp = Elts[i];
  2025. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2026. }
  2027. } else {
  2028. retVal = Elts[0];
  2029. }
  2030. return retVal;
  2031. }
  2032. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder) {
  2033. if (status && !isa<UndefValue>(status)) {
  2034. Value *statusVal = Builder.CreateExtractValue(ResRet, 4);
  2035. Builder.CreateStore(statusVal, status);
  2036. }
  2037. }
  2038. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2039. Value *Result = UndefValue::get(DstTy);
  2040. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2041. Result = Builder.CreateInsertElement(Result, Elt, i);
  2042. return Result;
  2043. }
  2044. // Sample intrinsics.
  2045. struct SampleHelper {
  2046. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2047. OP::OpCode opcode;
  2048. Value *texHandle;
  2049. Value *samplerHandle;
  2050. static const unsigned kMaxCoordDimensions = 4;
  2051. Value *coord[kMaxCoordDimensions];
  2052. Value *special; // For CompareValue, Bias, LOD.
  2053. // SampleGrad only.
  2054. static const unsigned kMaxDDXYDimensions = 3;
  2055. Value *ddx[kMaxDDXYDimensions];
  2056. Value *ddy[kMaxDDXYDimensions];
  2057. // Optional.
  2058. static const unsigned kMaxOffsetDimensions = 3;
  2059. Value *offset[kMaxOffsetDimensions];
  2060. Value *clamp;
  2061. Value *status;
  2062. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2063. unsigned coordDimensions) {
  2064. Value *coordArg = CI->getArgOperand(coordIdx);
  2065. IRBuilder<> Builder(CI);
  2066. for (unsigned i = 0; i < coordDimensions; i++)
  2067. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2068. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2069. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2070. coord[i] = undefF;
  2071. }
  2072. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2073. unsigned offsetDimensions) {
  2074. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2075. if (CI->getNumArgOperands() > offsetIdx) {
  2076. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2077. IRBuilder<> Builder(CI);
  2078. for (unsigned i = 0; i < offsetDimensions; i++)
  2079. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2080. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2081. offset[i] = undefI;
  2082. } else {
  2083. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2084. offset[i] = undefI;
  2085. }
  2086. }
  2087. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2088. if (CI->getNumArgOperands() > clampIdx) {
  2089. clamp = CI->getArgOperand(clampIdx);
  2090. if (clamp->getType()->isVectorTy()) {
  2091. IRBuilder<> Builder(CI);
  2092. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2093. }
  2094. } else
  2095. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2096. }
  2097. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2098. if (CI->getNumArgOperands() == (statusIdx + 1))
  2099. status = CI->getArgOperand(statusIdx);
  2100. else
  2101. status = nullptr;
  2102. }
  2103. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg,
  2104. unsigned ddxySize) {
  2105. IRBuilder<> Builder(CI);
  2106. for (unsigned i = 0; i < ddxySize; i++)
  2107. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2108. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2109. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2110. ddxy[i] = undefF;
  2111. }
  2112. };
  2113. SampleHelper::SampleHelper(
  2114. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2115. : opcode(op) {
  2116. const unsigned thisIdx =
  2117. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2118. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2119. IRBuilder<> Builder(CI);
  2120. texHandle = CI->getArgOperand(thisIdx);
  2121. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2122. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2123. if (RK == DXIL::ResourceKind::Invalid) {
  2124. opcode = DXIL::OpCode::NumOpCodes;
  2125. return;
  2126. }
  2127. unsigned coordDimensions = DxilResource::GetNumCoords(RK);
  2128. unsigned offsetDimensions = DxilResource::GetNumOffsets(RK);
  2129. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2130. TranslateCoord(CI, kCoordArgIdx, coordDimensions);
  2131. special = nullptr;
  2132. switch (op) {
  2133. case OP::OpCode::Sample:
  2134. TranslateOffset(CI, HLOperandIndex::kSampleOffsetArgIndex,
  2135. offsetDimensions);
  2136. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex);
  2137. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex);
  2138. break;
  2139. case OP::OpCode::SampleLevel:
  2140. special = CI->getArgOperand(HLOperandIndex::kSampleLLevelArgIndex);
  2141. TranslateOffset(CI, HLOperandIndex::kSampleLOffsetArgIndex,
  2142. offsetDimensions);
  2143. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex);
  2144. break;
  2145. case OP::OpCode::SampleBias:
  2146. special = CI->getArgOperand(HLOperandIndex::kSampleBBiasArgIndex);
  2147. TranslateOffset(CI, HLOperandIndex::kSampleBOffsetArgIndex,
  2148. offsetDimensions);
  2149. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex);
  2150. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex);
  2151. break;
  2152. case OP::OpCode::SampleCmp:
  2153. special = CI->getArgOperand(HLOperandIndex::kSampleCmpCmpValArgIndex);
  2154. TranslateOffset(CI, HLOperandIndex::kSampleCmpOffsetArgIndex,
  2155. offsetDimensions);
  2156. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex);
  2157. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex);
  2158. break;
  2159. case OP::OpCode::SampleCmpLevelZero:
  2160. special = CI->getArgOperand(HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2161. TranslateOffset(CI, HLOperandIndex::kSampleCmpLZOffsetArgIndex,
  2162. offsetDimensions);
  2163. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex);
  2164. break;
  2165. case OP::OpCode::SampleGrad:
  2166. SetDDXY(CI, ddx, CI->getArgOperand(HLOperandIndex::kSampleGDDXArgIndex),
  2167. offsetDimensions);
  2168. SetDDXY(CI, ddy, CI->getArgOperand(HLOperandIndex::kSampleGDDYArgIndex),
  2169. offsetDimensions);
  2170. TranslateOffset(CI, HLOperandIndex::kSampleGOffsetArgIndex,
  2171. offsetDimensions);
  2172. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex);
  2173. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex);
  2174. break;
  2175. case OP::OpCode::CalculateLOD:
  2176. // Only need coord for LOD calculation.
  2177. break;
  2178. default:
  2179. DXASSERT(0, "invalid opcode for Sample");
  2180. break;
  2181. }
  2182. }
  2183. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2184. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2185. hlsl::OP *hlslOP = &helper.hlslOP;
  2186. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2187. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2188. Translated = false;
  2189. return nullptr;
  2190. }
  2191. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2192. IRBuilder<> Builder(CI);
  2193. Value *opArg =
  2194. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2195. Value *clamped = hlslOP->GetI1Const(bClamped);
  2196. Value *args[] = {opArg,
  2197. sampleHelper.texHandle,
  2198. sampleHelper.samplerHandle,
  2199. sampleHelper.coord[0],
  2200. sampleHelper.coord[1],
  2201. sampleHelper.coord[2],
  2202. clamped};
  2203. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2204. Type::getFloatTy(opArg->getContext()));
  2205. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2206. return LOD;
  2207. }
  2208. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2209. Value *status) {
  2210. IRBuilder<> Builder(CI);
  2211. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2212. // extract value part
  2213. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2214. // Replace ret val.
  2215. CI->replaceAllUsesWith(retVal);
  2216. // get status
  2217. if (status) {
  2218. UpdateStatus(call, status, Builder);
  2219. }
  2220. }
  2221. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2222. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2223. hlsl::OP *hlslOP = &helper.hlslOP;
  2224. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2225. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2226. Translated = false;
  2227. return nullptr;
  2228. }
  2229. Type *Ty = CI->getType();
  2230. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2231. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2232. switch (opcode) {
  2233. case OP::OpCode::Sample: {
  2234. Value *sampleArgs[] = {
  2235. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2236. // Coord.
  2237. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2238. sampleHelper.coord[3],
  2239. // Offset.
  2240. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2241. // Clamp.
  2242. sampleHelper.clamp};
  2243. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2244. } break;
  2245. case OP::OpCode::SampleLevel: {
  2246. Value *sampleArgs[] = {
  2247. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2248. // Coord.
  2249. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2250. sampleHelper.coord[3],
  2251. // Offset.
  2252. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2253. // LOD.
  2254. sampleHelper.special};
  2255. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2256. } break;
  2257. case OP::OpCode::SampleGrad: {
  2258. Value *sampleArgs[] = {
  2259. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2260. // Coord.
  2261. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2262. sampleHelper.coord[3],
  2263. // Offset.
  2264. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2265. // Ddx.
  2266. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2267. // Ddy.
  2268. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2269. // Clamp.
  2270. sampleHelper.clamp};
  2271. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2272. } break;
  2273. case OP::OpCode::SampleBias: {
  2274. // Clamp bias for immediate.
  2275. Value *bias = sampleHelper.special;
  2276. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2277. float v = FP->getValueAPF().convertToFloat();
  2278. if (v > DXIL::kMaxMipLodBias)
  2279. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2280. if (v < DXIL::kMinMipLodBias)
  2281. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2282. }
  2283. Value *sampleArgs[] = {
  2284. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2285. // Coord.
  2286. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2287. sampleHelper.coord[3],
  2288. // Offset.
  2289. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2290. // Bias.
  2291. bias,
  2292. // Clamp.
  2293. sampleHelper.clamp};
  2294. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2295. } break;
  2296. case OP::OpCode::SampleCmp: {
  2297. Value *sampleArgs[] = {
  2298. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2299. // Coord.
  2300. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2301. sampleHelper.coord[3],
  2302. // Offset.
  2303. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2304. // CmpVal.
  2305. sampleHelper.special,
  2306. // Clamp.
  2307. sampleHelper.clamp};
  2308. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2309. } break;
  2310. case OP::OpCode::SampleCmpLevelZero:
  2311. default: {
  2312. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2313. Value *sampleArgs[] = {
  2314. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2315. // Coord.
  2316. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2317. sampleHelper.coord[3],
  2318. // Offset.
  2319. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2320. // CmpVal.
  2321. sampleHelper.special};
  2322. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status);
  2323. } break;
  2324. }
  2325. // CI is replaced in GenerateDxilSample.
  2326. return nullptr;
  2327. }
  2328. // Gather intrinsics.
  2329. struct GatherHelper {
  2330. enum class GatherChannel {
  2331. GatherAll,
  2332. GatherRed,
  2333. GatherGreen,
  2334. GatherBlue,
  2335. GatherAlpha,
  2336. };
  2337. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2338. GatherHelper::GatherChannel ch);
  2339. OP::OpCode opcode;
  2340. Value *texHandle;
  2341. Value *samplerHandle;
  2342. static const unsigned kMaxCoordDimensions = 4;
  2343. Value *coord[kMaxCoordDimensions];
  2344. unsigned channel;
  2345. Value *special; // For CompareValue, Bias, LOD.
  2346. // Optional.
  2347. static const unsigned kMaxOffsetDimensions = 2;
  2348. Value *offset[kMaxOffsetDimensions];
  2349. // For the overload send different offset for each sample.
  2350. // Only save 3 sampleOffsets because use offset for normal overload as first
  2351. // sample offset.
  2352. static const unsigned kSampleOffsetDimensions = 3;
  2353. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2354. Value *status;
  2355. bool hasSampleOffsets;
  2356. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2357. unsigned coordDimensions) {
  2358. Value *coordArg = CI->getArgOperand(coordIdx);
  2359. IRBuilder<> Builder(CI);
  2360. for (unsigned i = 0; i < coordDimensions; i++)
  2361. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2362. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2363. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2364. coord[i] = undefF;
  2365. }
  2366. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2367. if (CI->getNumArgOperands() == (statusIdx + 1))
  2368. status = CI->getArgOperand(statusIdx);
  2369. else
  2370. status = nullptr;
  2371. }
  2372. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2373. unsigned offsetDimensions) {
  2374. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2375. if (CI->getNumArgOperands() > offsetIdx) {
  2376. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2377. IRBuilder<> Builder(CI);
  2378. for (unsigned i = 0; i < offsetDimensions; i++)
  2379. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2380. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2381. offset[i] = undefI;
  2382. } else {
  2383. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2384. offset[i] = undefI;
  2385. }
  2386. }
  2387. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2388. unsigned offsetDimensions) {
  2389. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2390. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2391. hasSampleOffsets = true;
  2392. IRBuilder<> Builder(CI);
  2393. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2394. Value *offsetArg = CI->getArgOperand(offsetIdx + ch);
  2395. for (unsigned i = 0; i < offsetDimensions; i++)
  2396. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2397. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2398. sampleOffsets[ch][i] = undefI;
  2399. }
  2400. }
  2401. }
  2402. // Update the offset args for gather with sample offset at sampleIdx.
  2403. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2404. unsigned sampleIdx) {
  2405. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2406. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2407. // -1 because offset for sample 0 is in GatherHelper::offset.
  2408. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2409. }
  2410. };
  2411. GatherHelper::GatherHelper(
  2412. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2413. GatherHelper::GatherChannel ch)
  2414. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2415. const unsigned thisIdx =
  2416. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2417. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2418. switch (ch) {
  2419. case GatherChannel::GatherAll:
  2420. channel = 0;
  2421. break;
  2422. case GatherChannel::GatherRed:
  2423. channel = 0;
  2424. break;
  2425. case GatherChannel::GatherGreen:
  2426. channel = 1;
  2427. break;
  2428. case GatherChannel::GatherBlue:
  2429. channel = 2;
  2430. break;
  2431. case GatherChannel::GatherAlpha:
  2432. channel = 3;
  2433. break;
  2434. }
  2435. IRBuilder<> Builder(CI);
  2436. texHandle = CI->getArgOperand(thisIdx);
  2437. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2438. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2439. if (RK == DXIL::ResourceKind::Invalid) {
  2440. opcode = DXIL::OpCode::NumOpCodes;
  2441. return;
  2442. }
  2443. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2444. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2445. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2446. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2447. switch (op) {
  2448. case OP::OpCode::TextureGather: {
  2449. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2450. // Gather all don't have sample offset version overload.
  2451. if (ch != GatherChannel::GatherAll)
  2452. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2453. offsetSize);
  2454. unsigned statusIdx =
  2455. hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
  2456. : HLOperandIndex::kGatherStatusArgIndex;
  2457. SetStatus(CI, statusIdx);
  2458. } break;
  2459. case OP::OpCode::TextureGatherCmp: {
  2460. special = CI->getArgOperand(HLOperandIndex::kGatherCmpCmpValArgIndex);
  2461. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2462. // Gather all don't have sample offset version overload.
  2463. if (ch != GatherChannel::GatherAll)
  2464. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2465. offsetSize);
  2466. unsigned statusIdx =
  2467. hasSampleOffsets
  2468. ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
  2469. : HLOperandIndex::kGatherCmpStatusArgIndex;
  2470. SetStatus(CI, statusIdx);
  2471. } break;
  2472. default:
  2473. DXASSERT(0, "invalid opcode for Gather");
  2474. break;
  2475. }
  2476. }
  2477. void GenerateDxilGather(CallInst *CI, Function *F,
  2478. MutableArrayRef<Value *> gatherArgs,
  2479. GatherHelper &helper) {
  2480. IRBuilder<> Builder(CI);
  2481. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2482. if (!helper.hasSampleOffsets) {
  2483. // extract value part
  2484. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2485. // Replace ret val.
  2486. CI->replaceAllUsesWith(retVal);
  2487. } else {
  2488. Value *retVal = UndefValue::get(CI->getType());
  2489. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2490. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2491. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2492. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2493. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2494. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2495. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2496. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2497. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2498. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2499. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2500. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2501. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2502. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2503. // Replace ret val.
  2504. CI->replaceAllUsesWith(retVal);
  2505. // TODO: UpdateStatus for each gather call.
  2506. }
  2507. // Get status
  2508. if (helper.status) {
  2509. UpdateStatus(call, helper.status, Builder);
  2510. }
  2511. }
  2512. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2513. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2514. hlsl::OP *hlslOP = &helper.hlslOP;
  2515. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2516. switch (IOP) {
  2517. case IntrinsicOp::MOP_Gather:
  2518. case IntrinsicOp::MOP_GatherCmp:
  2519. ch = GatherHelper::GatherChannel::GatherAll;
  2520. break;
  2521. case IntrinsicOp::MOP_GatherRed:
  2522. case IntrinsicOp::MOP_GatherCmpRed:
  2523. ch = GatherHelper::GatherChannel::GatherRed;
  2524. break;
  2525. case IntrinsicOp::MOP_GatherGreen:
  2526. case IntrinsicOp::MOP_GatherCmpGreen:
  2527. ch = GatherHelper::GatherChannel::GatherGreen;
  2528. break;
  2529. case IntrinsicOp::MOP_GatherBlue:
  2530. case IntrinsicOp::MOP_GatherCmpBlue:
  2531. ch = GatherHelper::GatherChannel::GatherBlue;
  2532. break;
  2533. case IntrinsicOp::MOP_GatherAlpha:
  2534. case IntrinsicOp::MOP_GatherCmpAlpha:
  2535. ch = GatherHelper::GatherChannel::GatherAlpha;
  2536. break;
  2537. default:
  2538. DXASSERT(0, "invalid gather intrinsic");
  2539. break;
  2540. }
  2541. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2542. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2543. Translated = false;
  2544. return nullptr;
  2545. }
  2546. Type *Ty = CI->getType();
  2547. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2548. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2549. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2550. switch (opcode) {
  2551. case OP::OpCode::TextureGather: {
  2552. Value *gatherArgs[] = {
  2553. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2554. // Coord.
  2555. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2556. gatherHelper.coord[3],
  2557. // Offset.
  2558. gatherHelper.offset[0], gatherHelper.offset[1],
  2559. // Channel.
  2560. channelArg};
  2561. GenerateDxilGather(CI, F, gatherArgs, gatherHelper);
  2562. } break;
  2563. case OP::OpCode::TextureGatherCmp: {
  2564. Value *gatherArgs[] = {
  2565. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2566. // Coord.
  2567. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2568. gatherHelper.coord[3],
  2569. // Offset.
  2570. gatherHelper.offset[0], gatherHelper.offset[1],
  2571. // Channel.
  2572. channelArg,
  2573. // CmpVal.
  2574. gatherHelper.special};
  2575. GenerateDxilGather(CI, F, gatherArgs, gatherHelper);
  2576. } break;
  2577. default:
  2578. DXASSERT(0, "invalid opcode for Gather");
  2579. break;
  2580. }
  2581. // CI is replaced in GenerateDxilGather.
  2582. return nullptr;
  2583. }
  2584. // Load/Store intrinsics.
  2585. struct ResLoadHelper {
  2586. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2587. Value *h, bool bForSubscript=false);
  2588. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2589. Value *h, Value *mip);
  2590. // For double subscript.
  2591. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  2592. : opcode(OP::OpCode::TextureLoad), handle(h), retVal(ldInst), addr(idx),
  2593. offset(nullptr), status(nullptr), mipLevel(mip) {}
  2594. OP::OpCode opcode;
  2595. Value *handle;
  2596. Value *retVal;
  2597. Value *addr;
  2598. Value *offset;
  2599. Value *status;
  2600. Value *mipLevel;
  2601. };
  2602. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2603. DxilResourceBase::Class RC, Value *hdl, bool bForSubscript)
  2604. : handle(hdl), offset(nullptr), status(nullptr) {
  2605. switch (RK) {
  2606. case DxilResource::Kind::RawBuffer:
  2607. case DxilResource::Kind::TypedBuffer:
  2608. case DxilResource::Kind::StructuredBuffer:
  2609. opcode = OP::OpCode::BufferLoad;
  2610. break;
  2611. case DxilResource::Kind::Invalid:
  2612. DXASSERT(0, "invalid resource kind");
  2613. break;
  2614. default:
  2615. opcode = OP::OpCode::TextureLoad;
  2616. break;
  2617. }
  2618. retVal = CI;
  2619. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  2620. addr = CI->getArgOperand(kAddrIdx);
  2621. unsigned argc = CI->getNumArgOperands();
  2622. if (opcode == OP::OpCode::TextureLoad) {
  2623. // mip at last channel
  2624. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2625. if (RC == DxilResourceBase::Class::SRV) {
  2626. if (bForSubscript) {
  2627. // Use 0 when access by [].
  2628. mipLevel = IRBuilder<>(CI).getInt32(0);
  2629. } else {
  2630. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  2631. // Use addr when access by Load.
  2632. mipLevel = addr;
  2633. } else {
  2634. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  2635. }
  2636. }
  2637. } else {
  2638. // Set mip level to undef for UAV.
  2639. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  2640. }
  2641. if (RC == DxilResourceBase::Class::SRV) {
  2642. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  2643. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  2644. if (RK == DxilResource::Kind::Texture2DMS ||
  2645. RK == DxilResource::Kind::Texture2DMSArray) {
  2646. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  2647. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  2648. mipLevel =
  2649. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  2650. }
  2651. if (argc > offsetIdx)
  2652. offset = CI->getArgOperand(offsetIdx);
  2653. if (argc > statusIdx)
  2654. status = CI->getArgOperand(statusIdx);
  2655. } else {
  2656. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  2657. if (argc > kStatusIdx)
  2658. status = CI->getArgOperand(kStatusIdx);
  2659. }
  2660. } else {
  2661. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  2662. if (argc > kStatusIdx)
  2663. status = CI->getArgOperand(kStatusIdx);
  2664. }
  2665. }
  2666. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2667. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  2668. : handle(hdl), offset(nullptr), status(nullptr) {
  2669. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  2670. RK != DxilResource::Kind::TypedBuffer &&
  2671. RK != DxilResource::Kind::Invalid,
  2672. "invalid resource kind");
  2673. opcode = OP::OpCode::TextureLoad;
  2674. retVal = CI;
  2675. mipLevel = mip;
  2676. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  2677. addr = CI->getArgOperand(kAddrIdx);
  2678. unsigned argc = CI->getNumArgOperands();
  2679. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  2680. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  2681. if (argc > kOffsetIdx)
  2682. offset = CI->getArgOperand(kOffsetIdx);
  2683. if (argc > kStatusIdx)
  2684. status = CI->getArgOperand(kStatusIdx);
  2685. }
  2686. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  2687. hlsl::OP *OP, const DataLayout &DL);
  2688. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  2689. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  2690. unsigned size, MutableArrayRef<Value *> resultElts,
  2691. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  2692. Type *i64Ty = Builder.getInt64Ty();
  2693. Type *doubleTy = Builder.getDoubleTy();
  2694. if (EltTy == doubleTy) {
  2695. Function *makeDouble =
  2696. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  2697. Value *makeDoubleOpArg =
  2698. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  2699. for (unsigned i = 0; i < size; i++) {
  2700. Value *lo = resultElts32[2 * i];
  2701. Value *hi = resultElts32[2 * i + 1];
  2702. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  2703. resultElts[i] = V;
  2704. }
  2705. } else {
  2706. for (unsigned i = 0; i < size; i++) {
  2707. Value *lo = resultElts32[2 * i];
  2708. Value *hi = resultElts32[2 * i + 1];
  2709. lo = Builder.CreateZExt(lo, i64Ty);
  2710. hi = Builder.CreateZExt(hi, i64Ty);
  2711. hi = Builder.CreateShl(hi, 32);
  2712. resultElts[i] = Builder.CreateOr(lo, hi);
  2713. }
  2714. }
  2715. }
  2716. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  2717. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  2718. Type *Ty = helper.retVal->getType();
  2719. if (Ty->isPointerTy()) {
  2720. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  2721. helper.status, OP, DL);
  2722. return;
  2723. }
  2724. OP::OpCode opcode = helper.opcode;
  2725. Type *i32Ty = Builder.getInt32Ty();
  2726. Type *i64Ty = Builder.getInt64Ty();
  2727. Type *doubleTy = Builder.getDoubleTy();
  2728. Type *EltTy = Ty->getScalarType();
  2729. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  2730. if (is64) {
  2731. EltTy = i32Ty;
  2732. }
  2733. Function *F = OP->GetOpFunc(opcode, EltTy);
  2734. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  2735. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  2736. SmallVector<Value *, 12> loadArgs;
  2737. loadArgs.emplace_back(opArg); // opcode
  2738. loadArgs.emplace_back(helper.handle); // resource handle
  2739. if (opcode == OP::OpCode::TextureLoad) {
  2740. // set mip level
  2741. loadArgs.emplace_back(helper.mipLevel);
  2742. }
  2743. if (opcode == OP::OpCode::TextureLoad) {
  2744. // texture coord
  2745. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2746. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  2747. for (unsigned i = 0; i < 3; i++) {
  2748. if (i < coordSize) {
  2749. loadArgs.emplace_back(
  2750. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  2751. }
  2752. else
  2753. loadArgs.emplace_back(undefI);
  2754. }
  2755. } else {
  2756. if (helper.addr->getType()->isVectorTy()) {
  2757. Value *scalarOffset =
  2758. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  2759. // TODO: calculate the real address based on opcode
  2760. loadArgs.emplace_back(scalarOffset); // offset
  2761. } else {
  2762. // TODO: calculate the real address based on opcode
  2763. loadArgs.emplace_back(helper.addr); // offset
  2764. }
  2765. }
  2766. // offset 0
  2767. if (opcode == OP::OpCode::TextureLoad) {
  2768. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  2769. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2770. for (unsigned i = 0; i < 3; i++) {
  2771. if (i < offsetSize)
  2772. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  2773. else
  2774. loadArgs.emplace_back(undefI);
  2775. }
  2776. } else {
  2777. loadArgs.emplace_back(undefI);
  2778. loadArgs.emplace_back(undefI);
  2779. loadArgs.emplace_back(undefI);
  2780. }
  2781. }
  2782. // Offset 1
  2783. if (RK == DxilResource::Kind::RawBuffer ||
  2784. RK == DxilResource::Kind::TypedBuffer) {
  2785. loadArgs.emplace_back(undefI);
  2786. } else if (RK == DxilResource::Kind::StructuredBuffer)
  2787. loadArgs.emplace_back(
  2788. OP->GetU32Const(0)); // For case use built-in types in structure buffer.
  2789. Value *ResRet =
  2790. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  2791. Value *retValNew = nullptr;
  2792. if (!is64) {
  2793. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  2794. } else {
  2795. unsigned size = 1;
  2796. if (Ty->isVectorTy()) {
  2797. size = Ty->getVectorNumElements();
  2798. }
  2799. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  2800. EltTy = Ty->getScalarType();
  2801. Value *Elts[2];
  2802. Make64bitResultForLoad(Ty->getScalarType(),
  2803. {
  2804. Builder.CreateExtractValue(ResRet, 0),
  2805. Builder.CreateExtractValue(ResRet, 1),
  2806. Builder.CreateExtractValue(ResRet, 2),
  2807. Builder.CreateExtractValue(ResRet, 3),
  2808. },
  2809. size, Elts, OP, Builder);
  2810. retValNew = ScalarizeElements(Ty, Elts, Builder);
  2811. }
  2812. // replace
  2813. helper.retVal->replaceAllUsesWith(retValNew);
  2814. // Save new ret val.
  2815. helper.retVal = retValNew;
  2816. // get status
  2817. UpdateStatus(ResRet, helper.status, Builder);
  2818. }
  2819. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2820. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2821. hlsl::OP *hlslOP = &helper.hlslOP;
  2822. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2823. IRBuilder<> Builder(CI);
  2824. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  2825. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  2826. ResLoadHelper loadHelper(CI, RK, RC, handle);
  2827. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.legacyDataLayout);
  2828. // CI is replaced in TranslateLoad.
  2829. return nullptr;
  2830. }
  2831. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  2832. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  2833. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  2834. IRBuilder<> &Builder) {
  2835. Type *i32Ty = Builder.getInt32Ty();
  2836. Type *doubleTy = Builder.getDoubleTy();
  2837. Value *undefI32 = UndefValue::get(i32Ty);
  2838. if (EltTy == doubleTy) {
  2839. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  2840. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  2841. for (unsigned i = 0; i < size; i++) {
  2842. if (isa<UndefValue>(vals[i])) {
  2843. vals32[2 * i] = undefI32;
  2844. vals32[2 * i + 1] = undefI32;
  2845. } else {
  2846. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  2847. Value *lo = Builder.CreateExtractValue(retVal, 0);
  2848. Value *hi = Builder.CreateExtractValue(retVal, 1);
  2849. vals32[2 * i] = lo;
  2850. vals32[2 * i + 1] = hi;
  2851. }
  2852. }
  2853. } else {
  2854. for (unsigned i = 0; i < size; i++) {
  2855. if (isa<UndefValue>(vals[i])) {
  2856. vals32[2 * i] = undefI32;
  2857. vals32[2 * i + 1] = undefI32;
  2858. } else {
  2859. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  2860. Value *hi = Builder.CreateLShr(vals[i], 32);
  2861. hi = Builder.CreateTrunc(hi, i32Ty);
  2862. vals32[2 * i] = lo;
  2863. vals32[2 * i + 1] = hi;
  2864. }
  2865. }
  2866. }
  2867. }
  2868. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  2869. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  2870. Type *Ty = val->getType();
  2871. OP::OpCode opcode;
  2872. switch (RK) {
  2873. case DxilResource::Kind::RawBuffer:
  2874. case DxilResource::Kind::TypedBuffer:
  2875. opcode = OP::OpCode::BufferStore;
  2876. break;
  2877. case DxilResource::Kind::Invalid:
  2878. DXASSERT(0, "invalid resource kind");
  2879. break;
  2880. default:
  2881. opcode = OP::OpCode::TextureStore;
  2882. break;
  2883. }
  2884. Type *i32Ty = Builder.getInt32Ty();
  2885. Type *i64Ty = Builder.getInt64Ty();
  2886. Type *doubleTy = Builder.getDoubleTy();
  2887. Type *EltTy = Ty->getScalarType();
  2888. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  2889. if (is64) {
  2890. EltTy = i32Ty;
  2891. }
  2892. Function *F = OP->GetOpFunc(opcode, EltTy);
  2893. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  2894. llvm::Value *undefI =
  2895. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  2896. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  2897. SmallVector<Value *, 13> storeArgs;
  2898. storeArgs.emplace_back(opArg); // opcode
  2899. storeArgs.emplace_back(handle); // resource handle
  2900. if (RK == DxilResource::Kind::RawBuffer ||
  2901. RK == DxilResource::Kind::TypedBuffer) {
  2902. // Offset 0
  2903. if (offset->getType()->isVectorTy()) {
  2904. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  2905. storeArgs.emplace_back(scalarOffset); // offset
  2906. } else {
  2907. storeArgs.emplace_back(offset); // offset
  2908. }
  2909. // Offset 1
  2910. storeArgs.emplace_back(undefI);
  2911. } else {
  2912. // texture store
  2913. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2914. // Set x first.
  2915. if (offset->getType()->isVectorTy())
  2916. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  2917. else
  2918. storeArgs.emplace_back(offset);
  2919. for (unsigned i = 1; i < 3; i++) {
  2920. if (i < coordSize)
  2921. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  2922. else
  2923. storeArgs.emplace_back(undefI);
  2924. }
  2925. // TODO: support mip for texture ST
  2926. }
  2927. // values
  2928. bool isTyped = opcode == OP::OpCode::TextureStore ||
  2929. RK == DxilResource::Kind::TypedBuffer;
  2930. uint8_t mask = 0;
  2931. if (Ty->isVectorTy()) {
  2932. unsigned vecSize = Ty->getVectorNumElements();
  2933. Value *emptyVal = undefVal;
  2934. if (isTyped) {
  2935. mask = DXIL::kCompMask_All;
  2936. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  2937. }
  2938. for (unsigned i = 0; i < 4; i++) {
  2939. if (i < vecSize) {
  2940. storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
  2941. mask |= (1<<i);
  2942. } else {
  2943. storeArgs.emplace_back(emptyVal);
  2944. }
  2945. }
  2946. } else {
  2947. if (isTyped) {
  2948. mask = DXIL::kCompMask_All;
  2949. storeArgs.emplace_back(val);
  2950. storeArgs.emplace_back(val);
  2951. storeArgs.emplace_back(val);
  2952. storeArgs.emplace_back(val);
  2953. } else {
  2954. storeArgs.emplace_back(val);
  2955. storeArgs.emplace_back(undefVal);
  2956. storeArgs.emplace_back(undefVal);
  2957. storeArgs.emplace_back(undefVal);
  2958. mask = DXIL::kCompMask_X;
  2959. }
  2960. }
  2961. if (is64) {
  2962. DXASSERT(mask == DXIL::kCompMask_All, "only typed buffer could have 64bit");
  2963. unsigned size = 1;
  2964. if (Ty->isVectorTy()) {
  2965. size = Ty->getVectorNumElements();
  2966. }
  2967. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  2968. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  2969. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  2970. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  2971. Value *V0 = storeArgs[val0OpIdx];
  2972. Value *V1 = storeArgs[val0OpIdx+1];
  2973. Value *vals32[4];
  2974. EltTy = Ty->getScalarType();
  2975. Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
  2976. // Fill the uninit vals.
  2977. if (size == 1) {
  2978. vals32[2] = vals32[0];
  2979. vals32[3] = vals32[1];
  2980. }
  2981. // Change valOp to 32 version.
  2982. for (unsigned i = 0; i < 4; i++) {
  2983. storeArgs[val0OpIdx + i] = vals32[i];
  2984. }
  2985. }
  2986. storeArgs.emplace_back(OP->GetU8Const(mask));
  2987. Builder.CreateCall(F, storeArgs);
  2988. }
  2989. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2990. HLOperationLowerHelper &helper,
  2991. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2992. hlsl::OP *hlslOP = &helper.hlslOP;
  2993. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2994. IRBuilder<> Builder(CI);
  2995. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  2996. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  2997. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  2998. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  2999. return nullptr;
  3000. }
  3001. }
  3002. // Atomic intrinsics.
  3003. namespace {
  3004. // Atomic intrinsics.
  3005. struct AtomicHelper {
  3006. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h);
  3007. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3008. Value *baseOffset);
  3009. OP::OpCode opcode;
  3010. Value *handle;
  3011. Value *addr;
  3012. Value *offset; // Offset for structrued buffer.
  3013. Value *value;
  3014. Value *originalValue;
  3015. Value *compareValue;
  3016. };
  3017. // For MOP version of Interlocked*.
  3018. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h)
  3019. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr) {
  3020. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3021. if (op == OP::OpCode::AtomicCompareExchange) {
  3022. compareValue = CI->getArgOperand(
  3023. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3024. value =
  3025. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3026. if (CI->getNumArgOperands() ==
  3027. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3028. originalValue = CI->getArgOperand(
  3029. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3030. } else {
  3031. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3032. if (CI->getNumArgOperands() ==
  3033. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3034. originalValue = CI->getArgOperand(
  3035. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3036. }
  3037. }
  3038. // For IOP version of Interlocked*.
  3039. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3040. Value *baseOffset)
  3041. : opcode(op), handle(h), addr(bufIdx),
  3042. offset(baseOffset), originalValue(nullptr) {
  3043. if (op == OP::OpCode::AtomicCompareExchange) {
  3044. compareValue =
  3045. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3046. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3047. if (CI->getNumArgOperands() ==
  3048. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3049. originalValue = CI->getArgOperand(
  3050. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3051. } else {
  3052. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3053. if (CI->getNumArgOperands() ==
  3054. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3055. originalValue =
  3056. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3057. }
  3058. }
  3059. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3060. DXIL::AtomicBinOpCode atomicOp,
  3061. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3062. Value *handle = helper.handle;
  3063. Value *addr = helper.addr;
  3064. Value *val = helper.value;
  3065. Type *Ty = val->getType();
  3066. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3067. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3068. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3069. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3070. Value *args[] = {opArg, handle, atomicOpArg,
  3071. undefI, undefI, undefI, // coordinates
  3072. val};
  3073. // Setup coordinates.
  3074. if (addr->getType()->isVectorTy()) {
  3075. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3076. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3077. _Analysis_assume_(vectorNumElements <= 3);
  3078. for (unsigned i = 0; i < vectorNumElements; i++) {
  3079. Value *Elt = Builder.CreateExtractElement(addr, i);
  3080. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3081. }
  3082. } else
  3083. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3084. // Set offset for structured buffer.
  3085. if (helper.offset)
  3086. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3087. Value *origVal =
  3088. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3089. if (helper.originalValue) {
  3090. Builder.CreateStore(origVal, helper.originalValue);
  3091. }
  3092. }
  3093. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3094. OP::OpCode opcode,
  3095. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3096. hlsl::OP *hlslOP = &helper.hlslOP;
  3097. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3098. IRBuilder<> Builder(CI);
  3099. switch (IOP) {
  3100. case IntrinsicOp::MOP_InterlockedAdd: {
  3101. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3102. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3103. hlslOP);
  3104. } break;
  3105. case IntrinsicOp::MOP_InterlockedAnd: {
  3106. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3107. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3108. hlslOP);
  3109. } break;
  3110. case IntrinsicOp::MOP_InterlockedExchange: {
  3111. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3112. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3113. Builder, hlslOP);
  3114. } break;
  3115. case IntrinsicOp::MOP_InterlockedMax: {
  3116. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3117. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3118. hlslOP);
  3119. } break;
  3120. case IntrinsicOp::MOP_InterlockedMin: {
  3121. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3122. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3123. hlslOP);
  3124. } break;
  3125. case IntrinsicOp::MOP_InterlockedUMax: {
  3126. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3127. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3128. hlslOP);
  3129. } break;
  3130. case IntrinsicOp::MOP_InterlockedUMin: {
  3131. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3132. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3133. hlslOP);
  3134. } break;
  3135. case IntrinsicOp::MOP_InterlockedOr: {
  3136. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3137. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3138. hlslOP);
  3139. } break;
  3140. case IntrinsicOp::MOP_InterlockedXor: {
  3141. default:
  3142. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor,
  3143. "invalid MOP atomic intrinsic");
  3144. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3145. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3146. hlslOP);
  3147. } break;
  3148. }
  3149. return nullptr;
  3150. }
  3151. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3152. hlsl::OP *hlslOP) {
  3153. Value *handle = helper.handle;
  3154. Value *addr = helper.addr;
  3155. Value *val = helper.value;
  3156. Value *cmpVal = helper.compareValue;
  3157. Type *Ty = val->getType();
  3158. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3159. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3160. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3161. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3162. cmpVal, val};
  3163. // Setup coordinates.
  3164. if (addr->getType()->isVectorTy()) {
  3165. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3166. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3167. _Analysis_assume_(vectorNumElements <= 3);
  3168. for (unsigned i = 0; i < vectorNumElements; i++) {
  3169. Value *Elt = Builder.CreateExtractElement(addr, i);
  3170. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3171. }
  3172. } else
  3173. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3174. // Set offset for structured buffer.
  3175. if (helper.offset)
  3176. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3177. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3178. if (helper.originalValue) {
  3179. Builder.CreateStore(origVal, helper.originalValue);
  3180. }
  3181. }
  3182. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3183. OP::OpCode opcode,
  3184. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3185. hlsl::OP *hlslOP = &helper.hlslOP;
  3186. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3187. IRBuilder<> Builder(CI);
  3188. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle);
  3189. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3190. return nullptr;
  3191. }
  3192. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3193. AtomicRMWInst::BinOp Op;
  3194. switch (IOP) {
  3195. case IntrinsicOp::IOP_InterlockedAdd:
  3196. Op = AtomicRMWInst::BinOp::Add;
  3197. break;
  3198. case IntrinsicOp::IOP_InterlockedAnd:
  3199. Op = AtomicRMWInst::BinOp::And;
  3200. break;
  3201. case IntrinsicOp::IOP_InterlockedExchange:
  3202. Op = AtomicRMWInst::BinOp::Xchg;
  3203. break;
  3204. case IntrinsicOp::IOP_InterlockedMax:
  3205. Op = AtomicRMWInst::BinOp::Max;
  3206. break;
  3207. case IntrinsicOp::IOP_InterlockedUMax:
  3208. Op = AtomicRMWInst::BinOp::UMax;
  3209. break;
  3210. case IntrinsicOp::IOP_InterlockedMin:
  3211. Op = AtomicRMWInst::BinOp::Min;
  3212. break;
  3213. case IntrinsicOp::IOP_InterlockedUMin:
  3214. Op = AtomicRMWInst::BinOp::UMin;
  3215. break;
  3216. case IntrinsicOp::IOP_InterlockedOr:
  3217. Op = AtomicRMWInst::BinOp::Or;
  3218. break;
  3219. case IntrinsicOp::IOP_InterlockedXor:
  3220. default:
  3221. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3222. Op = AtomicRMWInst::BinOp::Xor;
  3223. break;
  3224. }
  3225. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3226. IRBuilder<> Builder(CI);
  3227. Value *Result = Builder.CreateAtomicRMW(
  3228. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3229. if (CI->getNumArgOperands() >
  3230. HLOperandIndex::kInterlockedOriginalValueOpIndex)
  3231. Builder.CreateStore(
  3232. Result,
  3233. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3234. }
  3235. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3236. DXIL::OpCode opcode,
  3237. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3238. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3239. // Get the original addr from cast.
  3240. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3241. addr = castInst->getOperand(0);
  3242. else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(addr)) {
  3243. if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  3244. addr = CE->getOperand(0);
  3245. }
  3246. }
  3247. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3248. if (addressSpace == DXIL::kTGSMAddrSpace)
  3249. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3250. else {
  3251. // buffer atomic translated in TranslateSubscript.
  3252. // Do nothing here.
  3253. // Mark not translated.
  3254. Translated = false;
  3255. }
  3256. return nullptr;
  3257. }
  3258. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3259. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3260. Value *cmpVal =
  3261. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3262. IRBuilder<> Builder(CI);
  3263. Value *Result = Builder.CreateAtomicCmpXchg(
  3264. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3265. AtomicOrdering::SequentiallyConsistent);
  3266. if (CI->getNumArgOperands() >
  3267. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3268. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3269. Builder.CreateStore(
  3270. originVal,
  3271. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3272. }
  3273. }
  3274. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3275. DXIL::OpCode opcode,
  3276. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3277. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3278. // Get the original addr from cast.
  3279. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3280. addr = castInst->getOperand(0);
  3281. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3282. if (addressSpace == DXIL::kTGSMAddrSpace)
  3283. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3284. else {
  3285. // buffer atomic translated in TranslateSubscript.
  3286. // Do nothing here.
  3287. // Mark not translated.
  3288. Translated = false;
  3289. }
  3290. return nullptr;
  3291. }
  3292. }
  3293. // Process Tess Factor.
  3294. namespace {
  3295. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3296. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3297. float fMin = 0;
  3298. float fMax = 1;
  3299. Type *f32Ty = input->getType()->getScalarType();
  3300. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3301. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3302. Type *Ty = input->getType();
  3303. if (Ty->isVectorTy())
  3304. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3305. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3306. if (Ty->isVectorTy())
  3307. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3308. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3309. }
  3310. // Clamp to [1.0f..Inf], NaN->1.0f.
  3311. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3312. {
  3313. float fMin = 1.0;
  3314. Type *f32Ty = input->getType()->getScalarType();
  3315. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3316. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3317. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3318. }
  3319. // Do partitioning-specific clamping.
  3320. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3321. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3322. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3323. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3324. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3325. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3326. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3327. float fMin;
  3328. float fMax;
  3329. switch (partitionMode) {
  3330. case DXIL::TessellatorPartitioning::Integer:
  3331. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3332. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3333. break;
  3334. case DXIL::TessellatorPartitioning::Pow2:
  3335. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3336. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3337. break;
  3338. case DXIL::TessellatorPartitioning::FractionalOdd:
  3339. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3340. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3341. break;
  3342. case DXIL::TessellatorPartitioning::FractionalEven:
  3343. default:
  3344. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3345. "invalid partition mode");
  3346. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3347. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3348. break;
  3349. }
  3350. Type *f32Ty = input->getType()->getScalarType();
  3351. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3352. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3353. Type *Ty = input->getType();
  3354. if (Ty->isVectorTy())
  3355. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3356. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3357. if (Ty->isVectorTy())
  3358. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3359. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3360. }
  3361. // round up for integer/pow2 partitioning
  3362. // note that this code assumes the inputs should be in the range [1, inf),
  3363. // which should be enforced by the clamp above.
  3364. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3365. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3366. switch (partitionMode) {
  3367. case DXIL::TessellatorPartitioning::Integer:
  3368. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  3369. case DXIL::TessellatorPartitioning::Pow2: {
  3370. const unsigned kExponentMask = 0x7f800000;
  3371. const unsigned kExponentLSB = 0x00800000;
  3372. const unsigned kMantissaMask = 0x007fffff;
  3373. Type *Ty = input->getType();
  3374. // (val = (asuint(val) & mantissamask) ?
  3375. // (asuint(val) & exponentmask) + exponentbump :
  3376. // asuint(val) & exponentmask;
  3377. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  3378. if (Ty->isVectorTy())
  3379. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  3380. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  3381. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  3382. mantMask = SplatToVector(mantMask, uintTy, Builder);
  3383. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  3384. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  3385. expMask = SplatToVector(expMask, uintTy, Builder);
  3386. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  3387. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  3388. expLSB = SplatToVector(expLSB, uintTy, Builder);
  3389. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  3390. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  3391. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  3392. return Builder.CreateUIToFP(factors, Ty);
  3393. } break;
  3394. case DXIL::TessellatorPartitioning::FractionalEven:
  3395. case DXIL::TessellatorPartitioning::FractionalOdd:
  3396. return input;
  3397. default:
  3398. DXASSERT(0, "invalid partition mode");
  3399. return nullptr;
  3400. }
  3401. }
  3402. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3403. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3404. hlsl::OP *hlslOP = &helper.hlslOP;
  3405. // Get partition mode
  3406. DXASSERT(helper.functionProps, "");
  3407. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3408. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3409. IRBuilder<> Builder(CI);
  3410. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  3411. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  3412. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  3413. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  3414. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  3415. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  3416. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  3417. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  3418. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3419. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  3420. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  3421. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  3422. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  3423. Builder.CreateStore(temp, roundedDetailFactor);
  3424. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  3425. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  3426. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  3427. Builder.CreateStore(temp, roundedDensityFactor);
  3428. return nullptr;
  3429. }
  3430. // 3 inputs, 1 result
  3431. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  3432. IRBuilder<> &Builder) {
  3433. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3434. Value *input1 = Builder.CreateExtractElement(input, 1);
  3435. Value *input2 = Builder.CreateExtractElement(input, 2);
  3436. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3437. Value *temp =
  3438. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3439. Value *combined =
  3440. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  3441. return combined;
  3442. } else {
  3443. // Avg.
  3444. Value *temp = Builder.CreateFAdd(input0, input1);
  3445. Value *combined = Builder.CreateFAdd(temp, input2);
  3446. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  3447. combined = Builder.CreateFMul(combined, rcp);
  3448. return combined;
  3449. }
  3450. }
  3451. // 4 inputs, 1 result
  3452. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3453. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3454. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3455. Value *input1 = Builder.CreateExtractElement(input, 1);
  3456. Value *input2 = Builder.CreateExtractElement(input, 2);
  3457. Value *input3 = Builder.CreateExtractElement(input, 3);
  3458. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3459. Value *temp0 =
  3460. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3461. Value *temp1 =
  3462. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3463. Value *combined =
  3464. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  3465. return combined;
  3466. } else {
  3467. // Avg.
  3468. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3469. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3470. Value *combined = Builder.CreateFAdd(temp0, temp1);
  3471. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  3472. combined = Builder.CreateFMul(combined, rcp);
  3473. return combined;
  3474. }
  3475. }
  3476. // 4 inputs, 2 result
  3477. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3478. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3479. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3480. Value *input1 = Builder.CreateExtractElement(input, 1);
  3481. Value *input2 = Builder.CreateExtractElement(input, 2);
  3482. Value *input3 = Builder.CreateExtractElement(input, 3);
  3483. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3484. Value *temp0 =
  3485. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3486. Value *temp1 =
  3487. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3488. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3489. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3490. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3491. return combined;
  3492. } else {
  3493. // Avg.
  3494. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3495. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3496. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3497. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3498. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3499. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  3500. rcp = ConstantVector::getSplat(2, rcp);
  3501. combined = Builder.CreateFMul(combined, rcp);
  3502. return combined;
  3503. }
  3504. }
  3505. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  3506. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3507. Value *clampedResult = *pClampedResult;
  3508. Value *clampedVal = clampedResult;
  3509. Value *roundedVal = rounded;
  3510. // Do partitioning-specific clamping.
  3511. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  3512. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  3513. if (clampedAvg->getType()->isVectorTy())
  3514. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  3515. // Limit the value.
  3516. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  3517. // Round up for integer/pow2 partitioning.
  3518. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  3519. if (rounded->getType() != cutoffVals->getType())
  3520. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  3521. // If the scaled value is less than three, then take the unscaled average.
  3522. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  3523. if (clampedAvg->getType() != clampedVal->getType())
  3524. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  3525. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  3526. if (roundedAvg->getType() != roundedVal->getType())
  3527. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  3528. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  3529. return result;
  3530. }
  3531. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  3532. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3533. Value *finalResult = *pFinalResult;
  3534. Value *clampedResult = *pClampedResult;
  3535. Value *clampR = clampedResult;
  3536. Value *finalR = finalResult;
  3537. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  3538. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  3539. Value *minValsX = cutoffVals;
  3540. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  3541. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  3542. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  3543. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  3544. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  3545. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  3546. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  3547. // Don't go over our threshold ("final" one is rounded).
  3548. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  3549. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  3550. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  3551. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  3552. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  3553. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  3554. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  3555. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  3556. }
  3557. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3558. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3559. hlsl::OP *hlslOP = &helper.hlslOP;
  3560. // Get partition mode
  3561. DXASSERT(helper.functionProps, "");
  3562. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3563. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3564. IRBuilder<> Builder(CI);
  3565. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  3566. switch (IOP) {
  3567. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3568. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3569. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3570. tessFactorOp = DXIL::OpCode::FMax;
  3571. break;
  3572. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3573. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3574. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3575. tessFactorOp = DXIL::OpCode::FMin;
  3576. break;
  3577. default:
  3578. // Default is Avg.
  3579. break;
  3580. }
  3581. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  3582. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  3583. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3584. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  3585. // Do partitioning-specific clamping.
  3586. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  3587. // Round up for integer/pow2 partitioning.
  3588. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3589. // Store the output.
  3590. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  3591. Builder.CreateStore(rounded, roundedEdgeFactor);
  3592. // Clamp to [1.0f..Inf], NaN->1.0f.
  3593. bool isQuad = false;
  3594. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  3595. Value *factors = nullptr;
  3596. switch (IOP) {
  3597. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3598. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3599. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3600. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3601. break;
  3602. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3603. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3604. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3605. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3606. isQuad = true;
  3607. break;
  3608. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3609. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3610. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3611. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3612. break;
  3613. default:
  3614. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3615. break;
  3616. }
  3617. Value *scaledI = nullptr;
  3618. if (scales->getType() == factors->getType())
  3619. scaledI = Builder.CreateFMul(factors, scales);
  3620. else {
  3621. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  3622. scaledI = Builder.CreateFMul(vecFactors, scales);
  3623. }
  3624. // Do partitioning-specific clamping.
  3625. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  3626. // Round up for integer/pow2 partitioning.
  3627. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  3628. Value *finalI = roundedI;
  3629. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  3630. // If not max, set to AVG.
  3631. if (tessFactorOp != DXIL::OpCode::FMax)
  3632. tessFactorOp = DXIL::OpCode::NumOpCodes;
  3633. bool b2D = false;
  3634. Value *avgFactorsI = nullptr;
  3635. switch (IOP) {
  3636. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3637. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3638. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3639. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3640. b2D = true;
  3641. break;
  3642. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3643. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3644. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3645. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3646. break;
  3647. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3648. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3649. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3650. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3651. break;
  3652. default:
  3653. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3654. break;
  3655. }
  3656. finalI =
  3657. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  3658. partition, hlslOP, Builder);
  3659. if (b2D)
  3660. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  3661. }
  3662. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  3663. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  3664. if (outFactorTy != clampedI->getType()) {
  3665. DXASSERT(isQuad, "quad only write one channel of out factor");
  3666. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  3667. // Splat clampedI to float2.
  3668. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  3669. }
  3670. Builder.CreateStore(clampedI, unroundedInsideFactor);
  3671. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  3672. if (outFactorTy != finalI->getType()) {
  3673. DXASSERT(isQuad, "quad only write one channel of out factor");
  3674. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  3675. // Splat finalI to float2.
  3676. finalI = SplatToVector(finalI, outFactorTy, Builder);
  3677. }
  3678. Builder.CreateStore(finalI, roundedInsideFactor);
  3679. return nullptr;
  3680. }
  3681. }
  3682. // Lower table.
  3683. namespace {
  3684. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  3685. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3686. DXASSERT(0, "unsupported intrinsic");
  3687. return nullptr;
  3688. }
  3689. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  3690. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3691. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  3692. // Do nothing here.
  3693. // Mark not translated.
  3694. Translated = false;
  3695. return nullptr;
  3696. }
  3697. IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
  3698. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  3699. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3700. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3701. {IntrinsicOp::IOP_CheckAccessFullyMapped, TrivialUnaryOperation, DXIL::OpCode::CheckAccessFullyMapped},
  3702. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  3703. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3704. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3705. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  3706. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  3707. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  3708. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  3709. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  3710. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  3711. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3712. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3713. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3714. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3715. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3716. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3717. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3718. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3719. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3720. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3721. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3722. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  3723. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3724. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3725. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3726. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  3727. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3728. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3729. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3730. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3731. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3732. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  3733. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  3734. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  3735. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  3736. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  3737. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  3738. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  3739. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  3740. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  3741. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  3742. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  3743. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  3744. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  3745. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  3746. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  3747. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  3748. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  3749. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  3750. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  3751. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  3752. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  3753. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  3754. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  3755. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  3756. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  3757. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  3758. {IntrinsicOp::IOP_abs, TransalteAbs, DXIL::OpCode::NumOpCodes},
  3759. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  3760. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  3761. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  3762. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  3763. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  3764. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  3765. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  3766. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  3767. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  3768. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  3769. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  3770. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  3771. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  3772. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  3773. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  3774. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  3775. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  3776. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  3777. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  3778. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  3779. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  3780. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  3781. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  3782. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  3783. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  3784. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  3785. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  3786. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  3787. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  3788. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  3789. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  3790. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  3791. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  3792. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  3793. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  3794. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  3795. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  3796. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  3797. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  3798. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  3799. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  3800. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  3801. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  3802. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  3803. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  3804. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  3805. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  3806. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  3807. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  3808. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  3809. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  3810. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  3811. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  3812. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  3813. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  3814. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  3815. {IntrinsicOp::IOP_mul, EmptyLower, DXIL::OpCode::NumOpCodes},
  3816. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  3817. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  3818. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  3819. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  3820. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  3821. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  3822. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  3823. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  3824. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  3825. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  3826. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  3827. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  3828. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  3829. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  3830. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  3831. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  3832. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  3833. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  3834. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  3835. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  3836. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  3837. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  3838. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  3839. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  3840. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  3841. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  3842. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  3843. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  3844. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  3845. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  3846. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  3847. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  3848. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  3849. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  3850. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  3851. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  3852. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  3853. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  3854. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  3855. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  3856. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  3857. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  3858. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  3859. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  3860. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  3861. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  3862. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  3863. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  3864. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  3865. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  3866. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  3867. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  3868. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  3869. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  3870. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  3871. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  3872. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  3873. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  3874. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  3875. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  3876. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  3877. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  3878. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  3879. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  3880. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  3881. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  3882. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  3883. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  3884. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3885. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3886. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3887. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3888. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3889. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3890. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3891. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3892. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3893. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  3894. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  3895. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  3896. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  3897. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  3898. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  3899. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  3900. // Manully added part.
  3901. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  3902. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  3903. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  3904. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  3905. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  3906. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  3907. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  3908. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  3909. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  3910. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  3911. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  3912. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  3913. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  3914. { IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul },
  3915. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  3916. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  3917. };
  3918. }
  3919. static void TranslateBuiltinIntrinsic(CallInst *CI,
  3920. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3921. unsigned opcode = hlsl::GetHLOpcode(CI);
  3922. const IntrinsicLower &lower = gLowerTable[opcode];
  3923. Value *Result =
  3924. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  3925. if (Result)
  3926. CI->replaceAllUsesWith(Result);
  3927. }
  3928. // SharedMem.
  3929. namespace {
  3930. bool IsSharedMemPtr(Value *Ptr) {
  3931. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  3932. }
  3933. bool IsLocalVariablePtr(Value *Ptr) {
  3934. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  3935. Ptr = GEP->getPointerOperand();
  3936. }
  3937. bool isAlloca = isa<AllocaInst>(Ptr);
  3938. if (isAlloca) return true;
  3939. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  3940. if (!GV) return false;
  3941. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  3942. }
  3943. }
  3944. // Constant buffer.
  3945. namespace {
  3946. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  3947. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  3948. "not an element type");
  3949. // TODO: Use real size after change constant buffer into linear layout.
  3950. if (DL.getTypeSizeInBits(EltType) <= 32) {
  3951. // Constant buffer is 4 bytes align.
  3952. return 4;
  3953. } else
  3954. return 8;
  3955. }
  3956. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  3957. IRBuilder<> &Builder) {
  3958. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  3959. // Align to 8 bytes for now.
  3960. Constant *align = hlslOP->GetU32Const(8);
  3961. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  3962. if (EltTy != i1Ty) {
  3963. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  3964. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  3965. } else {
  3966. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  3967. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, i32Ty);
  3968. Value *Result = Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  3969. return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
  3970. }
  3971. }
  3972. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  3973. bool colMajor, OP *OP, const DataLayout &DL,
  3974. IRBuilder<> &Builder) {
  3975. unsigned col, row;
  3976. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  3977. unsigned matSize = col * row;
  3978. std::vector<Value *> elts(matSize);
  3979. Value *EltByteSize = ConstantInt::get(
  3980. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  3981. // TODO: use real size after change constant buffer into linear layout.
  3982. Value *baseOffset = offset;
  3983. for (unsigned i = 0; i < matSize; i++) {
  3984. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  3985. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  3986. }
  3987. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  3988. }
  3989. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  3990. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  3991. DxilFieldAnnotation *prevFieldAnnotation,
  3992. const DataLayout &DL, DxilTypeSystem &dxilTypeSys);
  3993. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  3994. IRBuilder<> &Builder) {
  3995. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  3996. Value *baseIdx = (GEP->idx_begin())->get();
  3997. Value *zeroIdx = Builder.getInt32(0);
  3998. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  3999. "base index must be 0");
  4000. Value *idx = (GEP->idx_begin() + 1)->get();
  4001. if (ConstantInt *cidx = dyn_cast<ConstantInt>(idx)) {
  4002. return Builder.CreateExtractElement(ldData, idx);
  4003. } else {
  4004. // Dynamic indexing.
  4005. // Copy vec to array.
  4006. Type *Ty = ldData->getType();
  4007. Type *EltTy = Ty->getVectorElementType();
  4008. unsigned vecSize = Ty->getVectorNumElements();
  4009. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  4010. IRBuilder<> AllocaBuilder(
  4011. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4012. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4013. Value *zero = Builder.getInt32(0);
  4014. for (unsigned int i = 0; i < vecSize; i++) {
  4015. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  4016. Value *Ptr =
  4017. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  4018. Builder.CreateStore(Elt, Ptr);
  4019. }
  4020. // Load from temp array.
  4021. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4022. return Builder.CreateLoad(EltGEP);
  4023. }
  4024. }
  4025. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  4026. hlsl::OP *hlslOP,
  4027. DxilFieldAnnotation *prevFieldAnnotation,
  4028. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4029. IRBuilder<> Builder(user);
  4030. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4031. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4032. unsigned opcode = GetHLOpcode(CI);
  4033. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4034. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4035. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4036. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4037. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4038. "No store on cbuffer");
  4039. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4040. ->getType()
  4041. ->getPointerElementType();
  4042. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  4043. colMajor, hlslOP, DL, Builder);
  4044. CI->replaceAllUsesWith(newLd);
  4045. CI->eraseFromParent();
  4046. } else if (group == HLOpcodeGroup::HLSubscript) {
  4047. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4048. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4049. Type *matType = basePtr->getType()->getPointerElementType();
  4050. unsigned col, row;
  4051. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4052. Value *EltByteSize = ConstantInt::get(
  4053. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4054. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4055. Type *resultType = CI->getType()->getPointerElementType();
  4056. unsigned resultSize = 1;
  4057. if (resultType->isVectorTy())
  4058. resultSize = resultType->getVectorNumElements();
  4059. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4060. _Analysis_assume_(resultSize <= 16);
  4061. Value *idxList[16];
  4062. switch (subOp) {
  4063. case HLSubscriptOpcode::ColMatSubscript:
  4064. case HLSubscriptOpcode::RowMatSubscript: {
  4065. for (unsigned i = 0; i < resultSize; i++) {
  4066. Value *idx =
  4067. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4068. Value *offset = Builder.CreateMul(idx, EltByteSize);
  4069. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4070. }
  4071. } break;
  4072. case HLSubscriptOpcode::RowMatElement:
  4073. case HLSubscriptOpcode::ColMatElement: {
  4074. Constant *EltIdxs = cast<Constant>(idx);
  4075. for (unsigned i = 0; i < resultSize; i++) {
  4076. Value *offset =
  4077. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  4078. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4079. }
  4080. } break;
  4081. default:
  4082. DXASSERT(0, "invalid operation on const buffer");
  4083. break;
  4084. }
  4085. Value *ldData = UndefValue::get(resultType);
  4086. if (resultType->isVectorTy()) {
  4087. for (unsigned i = 0; i < resultSize; i++) {
  4088. Value *eltData =
  4089. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  4090. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4091. }
  4092. } else {
  4093. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  4094. }
  4095. for (auto U = CI->user_begin(); U != CI->user_end();) {
  4096. Value *subsUser = *(U++);
  4097. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  4098. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  4099. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  4100. Value *gepUser = *(gepU++);
  4101. // Must be load here;
  4102. LoadInst *ldUser = cast<LoadInst>(gepUser);
  4103. ldUser->replaceAllUsesWith(subData);
  4104. ldUser->eraseFromParent();
  4105. }
  4106. GEP->eraseFromParent();
  4107. } else {
  4108. // Must be load here.
  4109. LoadInst *ldUser = cast<LoadInst>(subsUser);
  4110. ldUser->replaceAllUsesWith(ldData);
  4111. ldUser->eraseFromParent();
  4112. }
  4113. }
  4114. CI->eraseFromParent();
  4115. } else {
  4116. DXASSERT(0, "not implemented yet");
  4117. }
  4118. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  4119. Type *Ty = ldInst->getType();
  4120. Type *EltTy = Ty->getScalarType();
  4121. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  4122. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4123. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  4124. if (Ty->isVectorTy()) {
  4125. Value *result = UndefValue::get(Ty);
  4126. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  4127. // Update offset by 4 bytes.
  4128. Value *offset =
  4129. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  4130. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  4131. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  4132. result = Builder.CreateInsertElement(result, elt, i);
  4133. // Update offset by 4 bytes.
  4134. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  4135. }
  4136. newLd = result;
  4137. }
  4138. ldInst->replaceAllUsesWith(newLd);
  4139. ldInst->eraseFromParent();
  4140. } else {
  4141. // Must be GEP here
  4142. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  4143. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  4144. prevFieldAnnotation, DL, dxilTypeSys);
  4145. GEP->eraseFromParent();
  4146. }
  4147. }
  4148. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4149. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4150. DxilFieldAnnotation *prevFieldAnnotation,
  4151. const DataLayout &DL, DxilTypeSystem &dxilTypeSys) {
  4152. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4153. Value *offset = baseOffset;
  4154. // update offset
  4155. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  4156. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4157. for (; GEPIt != E; GEPIt++) {
  4158. Value *idx = GEPIt.getOperand();
  4159. unsigned immIdx = 0;
  4160. bool bImmIdx = false;
  4161. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  4162. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4163. bImmIdx = true;
  4164. }
  4165. if (GEPIt->isPointerTy()) {
  4166. Type *EltTy = GEPIt->getPointerElementType();
  4167. unsigned size = 0;
  4168. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  4169. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4170. size = annotation->GetCBufferSize();
  4171. } else {
  4172. DXASSERT(fieldAnnotation, "must be a field");
  4173. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  4174. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4175. *fieldAnnotation, EltTy, dxilTypeSys);
  4176. // Decide the nested array size.
  4177. unsigned nestedArraySize = 1;
  4178. Type *EltTy = AT->getArrayElementType();
  4179. // support multi level of array
  4180. while (EltTy->isArrayTy()) {
  4181. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4182. nestedArraySize *= EltAT->getNumElements();
  4183. EltTy = EltAT->getElementType();
  4184. }
  4185. // Align to 4 * 4 bytes.
  4186. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4187. size = nestedArraySize * alignedSize;
  4188. } else {
  4189. size = DL.getTypeAllocSize(EltTy);
  4190. }
  4191. }
  4192. // Align to 4 * 4 bytes.
  4193. size = (size + 15) & 0xfffffff0;
  4194. if (bImmIdx) {
  4195. unsigned tempOffset = size * immIdx;
  4196. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4197. } else {
  4198. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4199. offset = Builder.CreateAdd(offset, tempOffset);
  4200. }
  4201. } else if (GEPIt->isStructTy()) {
  4202. StructType *ST = cast<StructType>(*GEPIt);
  4203. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4204. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  4205. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  4206. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  4207. } else if (GEPIt->isArrayTy()) {
  4208. DXASSERT(fieldAnnotation != nullptr, "must a field");
  4209. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4210. *fieldAnnotation, *GEPIt, dxilTypeSys);
  4211. // Decide the nested array size.
  4212. unsigned nestedArraySize = 1;
  4213. Type *EltTy = GEPIt->getArrayElementType();
  4214. // support multi level of array
  4215. while (EltTy->isArrayTy()) {
  4216. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4217. nestedArraySize *= EltAT->getNumElements();
  4218. EltTy = EltAT->getElementType();
  4219. }
  4220. // Align to 4 * 4 bytes.
  4221. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4222. unsigned size = nestedArraySize * alignedSize;
  4223. if (bImmIdx) {
  4224. unsigned tempOffset = size * immIdx;
  4225. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4226. } else {
  4227. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4228. offset = Builder.CreateAdd(offset, tempOffset);
  4229. }
  4230. } else if (GEPIt->isVectorTy()) {
  4231. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  4232. if (bImmIdx) {
  4233. unsigned tempOffset = size * immIdx;
  4234. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4235. } else {
  4236. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4237. offset = Builder.CreateAdd(offset, tempOffset);
  4238. }
  4239. } else {
  4240. gep_type_iterator temp = GEPIt;
  4241. temp++;
  4242. DXASSERT(temp == E, "scalar type must be the last");
  4243. }
  4244. }
  4245. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  4246. Instruction *user = cast<Instruction>(*(U++));
  4247. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  4248. dxilTypeSys, DL);
  4249. }
  4250. }
  4251. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  4252. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4253. auto User = ptr->user_begin();
  4254. auto UserE = ptr->user_end();
  4255. for (; User != UserE;) {
  4256. // Must be Instruction.
  4257. Instruction *I = cast<Instruction>(*(User++));
  4258. TranslateCBAddressUser(I, handle, offset, hlslOP,
  4259. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL);
  4260. }
  4261. }
  4262. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4263. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  4264. IRBuilder<> &Builder) {
  4265. DXASSERT((channelOffset) < 4, "legacy cbuffer don't across 16 bytes register.");
  4266. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4267. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  4268. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4269. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4270. bool isBool = EltTy == i1Ty;
  4271. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4272. bool isNormal = !isBool && !is64;
  4273. if (isNormal) {
  4274. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4275. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4276. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  4277. } else if (is64) {
  4278. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4279. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4280. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  4281. unsigned eltIdx = channelOffset>>1;
  4282. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  4283. return Result;
  4284. } else {
  4285. DXASSERT(isBool, "bool should be i1");
  4286. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  4287. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
  4288. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4289. Value *Result = Builder.CreateExtractValue(loadLegacy, channelOffset);
  4290. return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
  4291. }
  4292. }
  4293. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4294. unsigned channelOffset, Type *EltTy,
  4295. unsigned vecSize, OP *hlslOP,
  4296. IRBuilder<> &Builder) {
  4297. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4298. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  4299. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4300. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4301. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  4302. bool isBool = EltTy == i1Ty;
  4303. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4304. bool is16 = EltTy == halfTy && !hlslOP->UseMinPrecision();
  4305. bool isNormal = !isBool && !is64 && !is16;
  4306. DXASSERT(is16 || (channelOffset + vecSize) <= 4, "legacy cbuffer don't across 16 bytes register.");
  4307. if (isNormal) {
  4308. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4309. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4310. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4311. for (unsigned i = 0; i < vecSize; ++i) {
  4312. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4313. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4314. }
  4315. return Result;
  4316. } else if (is16) {
  4317. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4318. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4319. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4320. // index aligned by 2 bytes not 4 bytes
  4321. channelOffset *= 2;
  4322. for (unsigned i = 0; i < vecSize; ++i) {
  4323. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  4324. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4325. }
  4326. return Result;
  4327. } else if (is64) {
  4328. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4329. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  4330. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4331. unsigned smallVecSize = 2;
  4332. if (vecSize < smallVecSize)
  4333. smallVecSize = vecSize;
  4334. for (unsigned i = 0; i < smallVecSize; ++i) {
  4335. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4336. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4337. }
  4338. if (vecSize > 2) {
  4339. // Got to next cb register.
  4340. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  4341. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4342. for (unsigned i = 2; i < vecSize; ++i) {
  4343. Value *NewElt =
  4344. Builder.CreateExtractValue(loadLegacy, i-2);
  4345. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4346. }
  4347. }
  4348. return Result;
  4349. } else {
  4350. DXASSERT(isBool, "bool should be i1");
  4351. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  4352. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
  4353. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4354. Value *Result = UndefValue::get(VectorType::get(i32Ty, vecSize));
  4355. for (unsigned i = 0; i < vecSize; ++i) {
  4356. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4357. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4358. }
  4359. return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
  4360. }
  4361. }
  4362. Value *TranslateConstBufMatLdLegacy(Type *matType, Value *handle,
  4363. Value *legacyIdx, bool colMajor, OP *OP,
  4364. const DataLayout &DL,
  4365. IRBuilder<> &Builder) {
  4366. unsigned col, row;
  4367. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4368. unsigned matSize = col * row;
  4369. std::vector<Value *> elts(matSize);
  4370. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4371. if (colMajor) {
  4372. unsigned colByteSize = 4 * EltByteSize;
  4373. unsigned colRegSize = (colByteSize + 15) >> 4;
  4374. for (unsigned c = 0; c < col; c++) {
  4375. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4376. EltTy, row, OP, Builder);
  4377. for (unsigned r = 0; r < row; r++) {
  4378. unsigned matIdx = HLMatrixLower::GetColMajorIdx(r, c, row);
  4379. elts[matIdx] = Builder.CreateExtractElement(col, r);
  4380. }
  4381. // Update offset for a column.
  4382. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  4383. }
  4384. } else {
  4385. unsigned rowByteSize = 4 * EltByteSize;
  4386. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  4387. for (unsigned r = 0; r < row; r++) {
  4388. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4389. EltTy, col, OP, Builder);
  4390. for (unsigned c = 0; c < col; c++) {
  4391. unsigned matIdx = HLMatrixLower::GetRowMajorIdx(r, c, col);
  4392. elts[matIdx] = Builder.CreateExtractElement(row, c);
  4393. }
  4394. // Update offset for a row.
  4395. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  4396. }
  4397. }
  4398. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  4399. }
  4400. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  4401. Value *legacyIdx, unsigned channelOffset,
  4402. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4403. DxilFieldAnnotation *prevFieldAnnotation,
  4404. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4405. HLObjectOperationLowerHelper *pObjHelper);
  4406. void TranslateResourceInCB(LoadInst *LI,
  4407. HLObjectOperationLowerHelper *pObjHelper,
  4408. GlobalVariable *CbGV) {
  4409. if (LI->user_empty()) {
  4410. LI->eraseFromParent();
  4411. return;
  4412. }
  4413. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  4414. CallInst *CI = cast<CallInst>(LI->user_back());
  4415. MDNode *MD = HLModule::GetDxilResourceAttrib(CI->getCalledFunction());
  4416. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, MD);
  4417. // Lower Ptr to GV base Ptr.
  4418. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  4419. IRBuilder<> Builder(LI);
  4420. Value *GvLd = Builder.CreateLoad(GvPtr);
  4421. LI->replaceAllUsesWith(GvLd);
  4422. LI->eraseFromParent();
  4423. }
  4424. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  4425. Value *legacyIdx, unsigned channelOffset,
  4426. hlsl::OP *hlslOP,
  4427. DxilFieldAnnotation *prevFieldAnnotation,
  4428. DxilTypeSystem &dxilTypeSys,
  4429. const DataLayout &DL,
  4430. HLObjectOperationLowerHelper *pObjHelper) {
  4431. IRBuilder<> Builder(user);
  4432. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4433. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4434. unsigned opcode = GetHLOpcode(CI);
  4435. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4436. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4437. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4438. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4439. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4440. "No store on cbuffer");
  4441. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4442. ->getType()
  4443. ->getPointerElementType();
  4444. Value *newLd = TranslateConstBufMatLdLegacy(
  4445. matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder);
  4446. CI->replaceAllUsesWith(newLd);
  4447. CI->eraseFromParent();
  4448. } else if (group == HLOpcodeGroup::HLSubscript) {
  4449. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4450. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4451. Type *matType = basePtr->getType()->getPointerElementType();
  4452. unsigned col, row;
  4453. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4454. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4455. Type *resultType = CI->getType()->getPointerElementType();
  4456. unsigned resultSize = 1;
  4457. if (resultType->isVectorTy())
  4458. resultSize = resultType->getVectorNumElements();
  4459. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4460. _Analysis_assume_(resultSize <= 16);
  4461. Value *idxList[16];
  4462. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  4463. subOp == HLSubscriptOpcode::ColMatElement;
  4464. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  4465. !isa<ConstantAggregateZero>(idx) &&
  4466. !isa<ConstantDataSequential>(idx);
  4467. Value *ldData = UndefValue::get(resultType);
  4468. if (!dynamicIndexing) {
  4469. Value *matLd = TranslateConstBufMatLdLegacy(
  4470. matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder);
  4471. // The matLd is keep original layout, just use the idx calc in
  4472. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  4473. switch (subOp) {
  4474. case HLSubscriptOpcode::RowMatSubscript:
  4475. case HLSubscriptOpcode::ColMatSubscript: {
  4476. for (unsigned i = 0; i < resultSize; i++) {
  4477. idxList[i] =
  4478. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4479. }
  4480. } break;
  4481. case HLSubscriptOpcode::RowMatElement:
  4482. case HLSubscriptOpcode::ColMatElement: {
  4483. Constant *EltIdxs = cast<Constant>(idx);
  4484. for (unsigned i = 0; i < resultSize; i++) {
  4485. idxList[i] = EltIdxs->getAggregateElement(i);
  4486. }
  4487. } break;
  4488. default:
  4489. DXASSERT(0, "invalid operation on const buffer");
  4490. break;
  4491. }
  4492. if (resultType->isVectorTy()) {
  4493. for (unsigned i = 0; i < resultSize; i++) {
  4494. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  4495. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4496. }
  4497. } else {
  4498. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  4499. ldData = eltData;
  4500. }
  4501. } else {
  4502. // Must be matSub here.
  4503. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4504. if (colMajor) {
  4505. // idx is c * row + r.
  4506. // For first col, c is 0, so idx is r.
  4507. Value *one = Builder.getInt32(1);
  4508. // row.x = c[0].[idx]
  4509. // row.y = c[1].[idx]
  4510. // row.z = c[2].[idx]
  4511. // row.w = c[3].[idx]
  4512. Value *Elts[4];
  4513. ArrayType *AT = ArrayType::get(EltTy, col);
  4514. IRBuilder<> AllocaBuilder(user->getParent()
  4515. ->getParent()
  4516. ->getEntryBlock()
  4517. .getFirstInsertionPt());
  4518. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4519. Value *zero = AllocaBuilder.getInt32(0);
  4520. Value *cbufIdx = legacyIdx;
  4521. for (unsigned int c = 0; c < col; c++) {
  4522. Value *ColVal =
  4523. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  4524. EltTy, row, hlslOP, Builder);
  4525. // Convert ColVal to array for indexing.
  4526. for (unsigned int r = 0; r < row; r++) {
  4527. Value *Elt =
  4528. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  4529. Value *Ptr = Builder.CreateInBoundsGEP(
  4530. tempArray, {zero, Builder.getInt32(r)});
  4531. Builder.CreateStore(Elt, Ptr);
  4532. }
  4533. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4534. Elts[c] = Builder.CreateLoad(Ptr);
  4535. // Update cbufIdx.
  4536. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  4537. }
  4538. if (resultType->isVectorTy()) {
  4539. for (unsigned int c = 0; c < col; c++) {
  4540. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  4541. }
  4542. } else {
  4543. ldData = Elts[0];
  4544. }
  4545. } else {
  4546. // idx is r * col + c;
  4547. // r = idx / col;
  4548. Value *cCol = ConstantInt::get(idx->getType(), col);
  4549. idx = Builder.CreateUDiv(idx, cCol);
  4550. idx = Builder.CreateAdd(idx, legacyIdx);
  4551. // Just return a row.
  4552. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  4553. row, hlslOP, Builder);
  4554. }
  4555. if (!resultType->isVectorTy()) {
  4556. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  4557. }
  4558. }
  4559. for (auto U = CI->user_begin(); U != CI->user_end();) {
  4560. Value *subsUser = *(U++);
  4561. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  4562. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  4563. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  4564. Value *gepUser = *(gepU++);
  4565. // Must be load here;
  4566. LoadInst *ldUser = cast<LoadInst>(gepUser);
  4567. ldUser->replaceAllUsesWith(subData);
  4568. ldUser->eraseFromParent();
  4569. }
  4570. GEP->eraseFromParent();
  4571. } else {
  4572. // Must be load here.
  4573. LoadInst *ldUser = cast<LoadInst>(subsUser);
  4574. ldUser->replaceAllUsesWith(ldData);
  4575. ldUser->eraseFromParent();
  4576. }
  4577. }
  4578. CI->eraseFromParent();
  4579. } else {
  4580. DXASSERT(0, "not implemented yet");
  4581. }
  4582. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  4583. Type *Ty = ldInst->getType();
  4584. Type *EltTy = Ty->getScalarType();
  4585. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  4586. if (HLModule::IsHLSLObjectType(Ty)) {
  4587. CallInst *CI = cast<CallInst>(handle);
  4588. GlobalVariable *CbGV = cast<GlobalVariable>(
  4589. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  4590. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  4591. return;
  4592. }
  4593. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  4594. Value *newLd = nullptr;
  4595. if (Ty->isVectorTy())
  4596. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  4597. Ty->getVectorNumElements(), hlslOP, Builder);
  4598. else
  4599. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  4600. hlslOP, Builder);
  4601. ldInst->replaceAllUsesWith(newLd);
  4602. ldInst->eraseFromParent();
  4603. } else {
  4604. // Must be GEP here
  4605. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  4606. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  4607. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  4608. GEP->eraseFromParent();
  4609. }
  4610. }
  4611. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  4612. Value *legacyIndex, unsigned channel,
  4613. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4614. DxilFieldAnnotation *prevFieldAnnotation,
  4615. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4616. HLObjectOperationLowerHelper *pObjHelper) {
  4617. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4618. // update offset
  4619. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  4620. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4621. for (; GEPIt != E; GEPIt++) {
  4622. Value *idx = GEPIt.getOperand();
  4623. unsigned immIdx = 0;
  4624. bool bImmIdx = false;
  4625. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  4626. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4627. bImmIdx = true;
  4628. }
  4629. if (GEPIt->isPointerTy()) {
  4630. Type *EltTy = GEPIt->getPointerElementType();
  4631. unsigned size = 0;
  4632. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  4633. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4634. size = annotation->GetCBufferSize();
  4635. } else {
  4636. DXASSERT(fieldAnnotation, "must be a field");
  4637. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  4638. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4639. *fieldAnnotation, EltTy, dxilTypeSys);
  4640. // Decide the nested array size.
  4641. unsigned nestedArraySize = 1;
  4642. Type *EltTy = AT->getArrayElementType();
  4643. // support multi level of array
  4644. while (EltTy->isArrayTy()) {
  4645. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4646. nestedArraySize *= EltAT->getNumElements();
  4647. EltTy = EltAT->getElementType();
  4648. }
  4649. // Align to 4 * 4 bytes.
  4650. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4651. size = nestedArraySize * alignedSize;
  4652. } else {
  4653. size = DL.getTypeAllocSize(EltTy);
  4654. }
  4655. }
  4656. // Skip 0 idx.
  4657. if (bImmIdx && immIdx == 0)
  4658. continue;
  4659. // Align to 4 * 4 bytes.
  4660. size = (size + 15) & 0xfffffff0;
  4661. // Take this as array idxing.
  4662. if (bImmIdx) {
  4663. unsigned tempOffset = size * immIdx;
  4664. unsigned idxInc = tempOffset >> 4;
  4665. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  4666. } else {
  4667. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4668. Value *idxInc = Builder.CreateLShr(tempOffset, 4);
  4669. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  4670. }
  4671. // Array always start from x channel.
  4672. channel = 0;
  4673. } else if (GEPIt->isStructTy()) {
  4674. StructType *ST = cast<StructType>(*GEPIt);
  4675. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4676. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  4677. unsigned structOffset = fieldAnnotation->GetCBufferOffset() >>2;
  4678. channel += structOffset;
  4679. unsigned idxInc = channel >> 2;
  4680. channel = channel & 3;
  4681. if (idxInc)
  4682. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  4683. } else if (GEPIt->isArrayTy()) {
  4684. DXASSERT(fieldAnnotation != nullptr, "must a field");
  4685. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4686. *fieldAnnotation, *GEPIt, dxilTypeSys);
  4687. // Decide the nested array size.
  4688. unsigned nestedArraySize = 1;
  4689. Type *EltTy = GEPIt->getArrayElementType();
  4690. // support multi level of array
  4691. while (EltTy->isArrayTy()) {
  4692. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4693. nestedArraySize *= EltAT->getNumElements();
  4694. EltTy = EltAT->getElementType();
  4695. }
  4696. // Align to 4 * 4 bytes.
  4697. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4698. unsigned size = nestedArraySize * alignedSize;
  4699. if (bImmIdx) {
  4700. unsigned tempOffset = size * immIdx;
  4701. unsigned idxInc = tempOffset >> 4;
  4702. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  4703. } else {
  4704. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4705. Value *idxInc = Builder.CreateLShr(tempOffset, 4);
  4706. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  4707. }
  4708. // Array always start from x channel.
  4709. channel = 0;
  4710. } else if (GEPIt->isVectorTy()) {
  4711. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  4712. // Indexing on vector.
  4713. if (bImmIdx) {
  4714. unsigned tempOffset = size * immIdx;
  4715. unsigned channelInc = tempOffset >> 2;
  4716. DXASSERT((channel + channelInc)<=4, "vector should not cross cb register");
  4717. channel += channelInc;
  4718. if (channel == 4) {
  4719. // Get to another row.
  4720. // Update index and channel.
  4721. channel = 0;
  4722. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  4723. }
  4724. } else {
  4725. Type *EltTy = GEPIt->getVectorElementType();
  4726. // Load the whole register.
  4727. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  4728. /*channelOffset*/ 0, EltTy,
  4729. /*vecSize*/ 4, hlslOP, Builder);
  4730. // Copy to array.
  4731. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4732. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
  4733. Value *zeroIdx = hlslOP->GetU32Const(0);
  4734. for (unsigned i = 0; i < 4; i++) {
  4735. Value *Elt = Builder.CreateExtractElement(newLd, i);
  4736. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  4737. Builder.CreateStore(Elt, EltGEP);
  4738. }
  4739. // Make sure this is the end of GEP.
  4740. gep_type_iterator temp = GEPIt;
  4741. temp++;
  4742. DXASSERT(temp == E, "scalar type must be the last");
  4743. // Replace the GEP with array GEP.
  4744. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  4745. GEP->replaceAllUsesWith(ArrayGEP);
  4746. return;
  4747. }
  4748. } else {
  4749. gep_type_iterator temp = GEPIt;
  4750. temp++;
  4751. DXASSERT(temp == E, "scalar type must be the last");
  4752. }
  4753. }
  4754. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  4755. Instruction *user = cast<Instruction>(*(U++));
  4756. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  4757. dxilTypeSys, DL, pObjHelper);
  4758. }
  4759. }
  4760. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  4761. DxilTypeSystem &dxilTypeSys,
  4762. const DataLayout &DL,
  4763. HLObjectOperationLowerHelper *pObjHelper) {
  4764. auto User = ptr->user_begin();
  4765. auto UserE = ptr->user_end();
  4766. Value *zeroIdx = hlslOP->GetU32Const(0);
  4767. for (; User != UserE;) {
  4768. // Must be Instruction.
  4769. Instruction *I = cast<Instruction>(*(User++));
  4770. TranslateCBAddressUserLegacy(
  4771. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  4772. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  4773. }
  4774. }
  4775. }
  4776. // Structured buffer.
  4777. namespace {
  4778. // Calculate offset.
  4779. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  4780. hlsl::OP *OP, const DataLayout &DL) {
  4781. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4782. Value *addr = nullptr;
  4783. // update offset
  4784. if (GEP->hasAllConstantIndices()) {
  4785. unsigned gepOffset =
  4786. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  4787. addr = OP->GetU32Const(gepOffset);
  4788. } else {
  4789. Value *offset = OP->GetU32Const(0);
  4790. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4791. for (; GEPIt != E; GEPIt++) {
  4792. Value *idx = GEPIt.getOperand();
  4793. unsigned immIdx = 0;
  4794. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  4795. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4796. if (immIdx == 0) {
  4797. continue;
  4798. }
  4799. }
  4800. if (GEPIt->isPointerTy()) {
  4801. unsigned size = DL.getTypeAllocSize(GEPIt->getPointerElementType());
  4802. if (immIdx) {
  4803. unsigned tempOffset = size * immIdx;
  4804. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  4805. } else {
  4806. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  4807. offset = Builder.CreateAdd(offset, tempOffset);
  4808. }
  4809. } else if (GEPIt->isStructTy()) {
  4810. unsigned structOffset = 0;
  4811. for (unsigned i = 0; i < immIdx; i++) {
  4812. structOffset += DL.getTypeAllocSize(GEPIt->getStructElementType(i));
  4813. }
  4814. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  4815. } else if (GEPIt->isArrayTy()) {
  4816. unsigned size = DL.getTypeAllocSize(GEPIt->getArrayElementType());
  4817. if (immIdx) {
  4818. unsigned tempOffset = size * immIdx;
  4819. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  4820. } else {
  4821. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  4822. offset = Builder.CreateAdd(offset, tempOffset);
  4823. }
  4824. } else if (GEPIt->isVectorTy()) {
  4825. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  4826. if (immIdx) {
  4827. unsigned tempOffset = size * immIdx;
  4828. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  4829. } else {
  4830. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  4831. offset = Builder.CreateAdd(offset, tempOffset);
  4832. }
  4833. } else {
  4834. gep_type_iterator temp = GEPIt;
  4835. temp++;
  4836. DXASSERT(temp == E, "scalar type must be the last");
  4837. }
  4838. };
  4839. addr = offset;
  4840. }
  4841. // TODO: x4 for byte address
  4842. return addr;
  4843. }
  4844. void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  4845. Value *status, Type *EltTy,
  4846. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  4847. IRBuilder<> &Builder) {
  4848. OP::OpCode opcode = OP::OpCode::BufferLoad;
  4849. DXASSERT(resultElts.size() <= 4,
  4850. "buffer load cannot load more than 4 values");
  4851. Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset};
  4852. Type *i64Ty = Builder.getInt64Ty();
  4853. Type *doubleTy = Builder.getDoubleTy();
  4854. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  4855. if (!is64) {
  4856. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  4857. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  4858. for (unsigned i = 0; i < resultElts.size(); i++) {
  4859. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  4860. }
  4861. // status
  4862. UpdateStatus(Ld, status, Builder);
  4863. return;
  4864. } else {
  4865. // 64 bit.
  4866. Function *dxilF = OP->GetOpFunc(opcode, Builder.getInt32Ty());
  4867. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  4868. Value *resultElts32[8];
  4869. unsigned size = resultElts.size();
  4870. unsigned eltBase = 0;
  4871. for (unsigned i = 0; i < size; i++) {
  4872. if (i == 2) {
  4873. // Update offset 4 by 4 bytes.
  4874. Args[DXIL::OperandIndex::kBufferLoadCoord1OpIdx] =
  4875. Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  4876. Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  4877. eltBase = 4;
  4878. }
  4879. unsigned resBase = 2 * i;
  4880. resultElts32[resBase] = Builder.CreateExtractValue(Ld, resBase - eltBase);
  4881. resultElts32[resBase + 1] =
  4882. Builder.CreateExtractValue(Ld, resBase + 1 - eltBase);
  4883. }
  4884. Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, OP, Builder);
  4885. // status
  4886. UpdateStatus(Ld, status, Builder);
  4887. return;
  4888. }
  4889. }
  4890. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  4891. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  4892. ArrayRef<Value *> vals, uint8_t mask) {
  4893. OP::OpCode opcode = OP::OpCode::BufferStore;
  4894. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  4895. Type *i64Ty = Builder.getInt64Ty();
  4896. Type *doubleTy = Builder.getDoubleTy();
  4897. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  4898. if (!is64) {
  4899. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  4900. handle,
  4901. bufIdx,
  4902. offset,
  4903. vals[0],
  4904. vals[1],
  4905. vals[2],
  4906. vals[3],
  4907. OP->GetU8Const(mask)};
  4908. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  4909. Builder.CreateCall(dxilF, Args);
  4910. } else {
  4911. Type *i32Ty = Builder.getInt32Ty();
  4912. Function *dxilF = OP->GetOpFunc(opcode, i32Ty);
  4913. Value *undefI32 = UndefValue::get(i32Ty);
  4914. Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
  4915. undefI32, undefI32, undefI32, undefI32};
  4916. unsigned maskLo = 0;
  4917. unsigned maskHi = 0;
  4918. unsigned size = 0;
  4919. switch (mask) {
  4920. case 1:
  4921. maskLo = 3;
  4922. size = 1;
  4923. break;
  4924. case 3:
  4925. maskLo = 15;
  4926. size = 2;
  4927. break;
  4928. case 7:
  4929. maskLo = 15;
  4930. maskHi = 3;
  4931. size = 3;
  4932. break;
  4933. case 15:
  4934. maskLo = 15;
  4935. maskHi = 15;
  4936. size = 4;
  4937. break;
  4938. default:
  4939. DXASSERT(0, "invalid mask");
  4940. }
  4941. Split64bitValForStore(EltTy, vals, size, vals32, OP, Builder);
  4942. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  4943. handle,
  4944. bufIdx,
  4945. offset,
  4946. vals32[0],
  4947. vals32[1],
  4948. vals32[2],
  4949. vals32[3],
  4950. OP->GetU8Const(maskLo)};
  4951. Builder.CreateCall(dxilF, Args);
  4952. if (maskHi) {
  4953. // Update offset 4 by 4 bytes.
  4954. offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  4955. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  4956. handle,
  4957. bufIdx,
  4958. offset,
  4959. vals32[4],
  4960. vals32[5],
  4961. vals32[6],
  4962. vals32[7],
  4963. OP->GetU8Const(maskHi)};
  4964. Builder.CreateCall(dxilF, Args);
  4965. }
  4966. }
  4967. }
  4968. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  4969. Value *handle, hlsl::OP *OP, Value *status,
  4970. Value *bufIdx, Value *baseOffset,
  4971. bool colMajor) {
  4972. unsigned col, row;
  4973. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4974. Value *offset = baseOffset;
  4975. if (baseOffset == nullptr)
  4976. offset = OP->GetU32Const(0);
  4977. unsigned matSize = col * row;
  4978. std::vector<Value *> elts(matSize);
  4979. unsigned rest = (matSize % 4);
  4980. if (rest) {
  4981. Value *ResultElts[4];
  4982. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
  4983. for (unsigned i = 0; i < rest; i++)
  4984. elts[i] = ResultElts[i];
  4985. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * rest));
  4986. }
  4987. for (unsigned i = rest; i < matSize; i += 4) {
  4988. Value *ResultElts[4];
  4989. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
  4990. elts[i] = ResultElts[0];
  4991. elts[i + 1] = ResultElts[1];
  4992. elts[i + 2] = ResultElts[2];
  4993. elts[i + 3] = ResultElts[3];
  4994. // Update offset by 4*4bytes.
  4995. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
  4996. }
  4997. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  4998. }
  4999. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  5000. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  5001. Value *val, bool colMajor) {
  5002. unsigned col, row;
  5003. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  5004. Value *offset = baseOffset;
  5005. if (baseOffset == nullptr)
  5006. offset = OP->GetU32Const(0);
  5007. unsigned matSize = col * row;
  5008. Value *undefElt = UndefValue::get(EltTy);
  5009. unsigned storeSize = matSize;
  5010. if (matSize % 4) {
  5011. storeSize = matSize + 4 - (matSize & 3);
  5012. }
  5013. std::vector<Value *> elts(storeSize, undefElt);
  5014. if (colMajor) {
  5015. for (unsigned i = 0; i < matSize; i++)
  5016. elts[i] = Builder.CreateExtractElement(val, i);
  5017. } else {
  5018. for (unsigned r = 0; r < row; r++)
  5019. for (unsigned c = 0; c < col; c++) {
  5020. unsigned rowMajorIdx = r * col + c;
  5021. unsigned colMajorIdx = c * row + r;
  5022. elts[rowMajorIdx] = Builder.CreateExtractElement(val, colMajorIdx);
  5023. }
  5024. }
  5025. for (unsigned i = 0; i < matSize; i += 4) {
  5026. uint8_t mask = 0;
  5027. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  5028. if (elts[i+j] != undefElt)
  5029. mask |= (1<<j);
  5030. }
  5031. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  5032. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask);
  5033. // Update offset by 4*4bytes.
  5034. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
  5035. }
  5036. }
  5037. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  5038. Value *status, Value *bufIdx,
  5039. Value *baseOffset) {
  5040. IRBuilder<> Builder(CI);
  5041. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  5042. unsigned opcode = GetHLOpcode(CI);
  5043. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  5044. "only translate matrix loadStore here.");
  5045. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5046. switch (matOp) {
  5047. case HLMatLoadStoreOpcode::ColMatLoad: {
  5048. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  5049. Value *NewLd = TranslateStructBufMatLd(
  5050. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  5051. bufIdx, baseOffset, /*colMajor*/ true);
  5052. CI->replaceAllUsesWith(NewLd);
  5053. } break;
  5054. case HLMatLoadStoreOpcode::RowMatLoad: {
  5055. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  5056. Value *NewLd = TranslateStructBufMatLd(
  5057. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  5058. bufIdx, baseOffset, /*colMajor*/ false);
  5059. CI->replaceAllUsesWith(NewLd);
  5060. } break;
  5061. case HLMatLoadStoreOpcode::ColMatStore: {
  5062. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  5063. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  5064. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  5065. handle, OP, bufIdx, baseOffset, val,
  5066. /*colMajor*/ true);
  5067. } break;
  5068. case HLMatLoadStoreOpcode::RowMatStore: {
  5069. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  5070. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  5071. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  5072. handle, OP, bufIdx, baseOffset, val,
  5073. /*colMajor*/ false);
  5074. } break;
  5075. }
  5076. CI->eraseFromParent();
  5077. }
  5078. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5079. Value *bufIdx, Value *baseOffset,
  5080. Value *status, hlsl::OP *OP, const DataLayout &DL);
  5081. void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
  5082. hlsl::OP *hlslOP, Value *bufIdx,
  5083. Value *baseOffset, Value *status,
  5084. const DataLayout &DL) {
  5085. Value *zeroIdx = hlslOP->GetU32Const(0);
  5086. if (baseOffset == nullptr)
  5087. baseOffset = zeroIdx;
  5088. unsigned opcode = GetHLOpcode(CI);
  5089. IRBuilder<> subBuilder(CI);
  5090. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5091. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5092. Type *matType = basePtr->getType()->getPointerElementType();
  5093. unsigned col, row;
  5094. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  5095. Value *EltByteSize = ConstantInt::get(
  5096. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5097. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5098. Type *resultType = CI->getType()->getPointerElementType();
  5099. unsigned resultSize = 1;
  5100. if (resultType->isVectorTy())
  5101. resultSize = resultType->getVectorNumElements();
  5102. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5103. _Analysis_assume_(resultSize <= 16);
  5104. std::vector<Value *> idxList(resultSize);
  5105. switch (subOp) {
  5106. case HLSubscriptOpcode::ColMatSubscript:
  5107. case HLSubscriptOpcode::RowMatSubscript: {
  5108. for (unsigned i = 0; i < resultSize; i++) {
  5109. Value *offset =
  5110. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5111. offset = subBuilder.CreateMul(offset, EltByteSize);
  5112. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5113. }
  5114. } break;
  5115. case HLSubscriptOpcode::RowMatElement:
  5116. case HLSubscriptOpcode::ColMatElement: {
  5117. Constant *EltIdxs = cast<Constant>(idx);
  5118. for (unsigned i = 0; i < resultSize; i++) {
  5119. Value *offset =
  5120. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5121. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5122. }
  5123. } break;
  5124. default:
  5125. DXASSERT(0, "invalid operation on const buffer");
  5126. break;
  5127. }
  5128. Value *undefElt = UndefValue::get(EltTy);
  5129. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5130. Value *subsUser = *(U++);
  5131. if (resultSize == 1) {
  5132. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
  5133. bufIdx, idxList[0], status, hlslOP, DL);
  5134. continue;
  5135. }
  5136. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5137. Value *GEPOffset =
  5138. HLMatrixLower::LowerGEPOnMatIndexListToIndex(GEP, idxList);
  5139. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5140. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  5141. TranslateStructBufSubscriptUser(gepUserInst, handle, bufIdx, GEPOffset,
  5142. status, hlslOP, DL);
  5143. }
  5144. GEP->eraseFromParent();
  5145. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  5146. IRBuilder<> stBuilder(stUser);
  5147. Value *Val = stUser->getValueOperand();
  5148. if (Val->getType()->isVectorTy()) {
  5149. for (unsigned i = 0; i < resultSize; i++) {
  5150. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  5151. uint8_t mask = DXIL::kCompMask_X;
  5152. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  5153. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  5154. mask);
  5155. }
  5156. } else {
  5157. uint8_t mask = DXIL::kCompMask_X;
  5158. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  5159. stBuilder, {Val, undefElt, undefElt, undefElt},
  5160. mask);
  5161. }
  5162. stUser->eraseFromParent();
  5163. } else {
  5164. // Must be load here.
  5165. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5166. IRBuilder<> ldBuilder(ldUser);
  5167. Value *ldData = UndefValue::get(resultType);
  5168. if (resultType->isVectorTy()) {
  5169. for (unsigned i = 0; i < resultSize; i++) {
  5170. Value *ResultElt;
  5171. GenerateStructBufLd(handle, bufIdx, idxList[i],
  5172. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  5173. ldBuilder);
  5174. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  5175. }
  5176. } else {
  5177. GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  5178. EltTy, ldData, hlslOP, ldBuilder);
  5179. }
  5180. ldUser->replaceAllUsesWith(ldData);
  5181. ldUser->eraseFromParent();
  5182. }
  5183. }
  5184. CI->eraseFromParent();
  5185. }
  5186. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5187. Value *bufIdx, Value *baseOffset,
  5188. Value *status, hlsl::OP *OP, const DataLayout &DL) {
  5189. IRBuilder<> Builder(user);
  5190. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  5191. HLOpcodeGroup group =
  5192. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5193. unsigned opcode = GetHLOpcode(userCall);
  5194. // For case element type of structure buffer is not structure type.
  5195. if (baseOffset == nullptr)
  5196. baseOffset = OP->GetU32Const(0);
  5197. if (group == HLOpcodeGroup::HLIntrinsic) {
  5198. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5199. switch (IOP) {
  5200. case IntrinsicOp::MOP_Load: {
  5201. if (userCall->getType()->isPointerTy()) {
  5202. // Struct will return pointers which like []
  5203. } else {
  5204. // Use builtin types on structuredBuffer.
  5205. }
  5206. DXASSERT(0, "not implement yet");
  5207. } break;
  5208. case IntrinsicOp::IOP_InterlockedAdd: {
  5209. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5210. baseOffset);
  5211. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  5212. Builder, OP);
  5213. } break;
  5214. case IntrinsicOp::IOP_InterlockedAnd: {
  5215. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5216. baseOffset);
  5217. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  5218. Builder, OP);
  5219. } break;
  5220. case IntrinsicOp::IOP_InterlockedExchange: {
  5221. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5222. baseOffset);
  5223. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  5224. Builder, OP);
  5225. } break;
  5226. case IntrinsicOp::IOP_InterlockedMax: {
  5227. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5228. baseOffset);
  5229. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  5230. Builder, OP);
  5231. } break;
  5232. case IntrinsicOp::IOP_InterlockedMin: {
  5233. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5234. baseOffset);
  5235. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  5236. Builder, OP);
  5237. } break;
  5238. case IntrinsicOp::IOP_InterlockedUMax: {
  5239. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5240. baseOffset);
  5241. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  5242. Builder, OP);
  5243. } break;
  5244. case IntrinsicOp::IOP_InterlockedUMin: {
  5245. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5246. baseOffset);
  5247. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  5248. Builder, OP);
  5249. } break;
  5250. case IntrinsicOp::IOP_InterlockedOr: {
  5251. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5252. baseOffset);
  5253. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  5254. Builder, OP);
  5255. } break;
  5256. case IntrinsicOp::IOP_InterlockedXor: {
  5257. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5258. baseOffset);
  5259. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  5260. Builder, OP);
  5261. } break;
  5262. case IntrinsicOp::IOP_InterlockedCompareStore:
  5263. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5264. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  5265. handle, bufIdx, baseOffset);
  5266. TranslateAtomicCmpXChg(helper, Builder, OP);
  5267. } break;
  5268. default:
  5269. DXASSERT(0, "invalid opcode");
  5270. break;
  5271. }
  5272. userCall->eraseFromParent();
  5273. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  5274. // TODO: support 64 bit.
  5275. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  5276. baseOffset);
  5277. else if (group == HLOpcodeGroup::HLSubscript) {
  5278. TranslateStructBufMatSubscript(userCall, handle, OP, bufIdx, baseOffset, status, DL);
  5279. }
  5280. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  5281. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  5282. StoreInst *stInst = dyn_cast<StoreInst>(user);
  5283. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  5284. : stInst->getValueOperand()->getType();
  5285. Type *pOverloadTy = Ty->getScalarType();
  5286. Value *offset = baseOffset;
  5287. if (baseOffset == nullptr)
  5288. offset = OP->GetU32Const(0);
  5289. unsigned arraySize = 1;
  5290. Value *eltSize = nullptr;
  5291. if (pOverloadTy->isArrayTy()) {
  5292. arraySize = pOverloadTy->getArrayNumElements();
  5293. eltSize = OP->GetU32Const(
  5294. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  5295. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  5296. }
  5297. if (ldInst) {
  5298. auto LdElement = [&](Value *offset, IRBuilder<> &Builder) -> Value * {
  5299. Value *ResultElts[4];
  5300. GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
  5301. ResultElts, OP, Builder);
  5302. return ScalarizeElements(Ty, ResultElts, Builder);
  5303. };
  5304. Value *newLd = LdElement(offset, Builder);
  5305. if (arraySize > 1) {
  5306. newLd =
  5307. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  5308. for (unsigned i = 1; i < arraySize; i++) {
  5309. offset = Builder.CreateAdd(offset, eltSize);
  5310. Value *eltLd = LdElement(offset, Builder);
  5311. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  5312. }
  5313. }
  5314. ldInst->replaceAllUsesWith(newLd);
  5315. } else {
  5316. Value *val = stInst->getValueOperand();
  5317. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  5318. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  5319. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  5320. uint8_t mask = 0;
  5321. if (Ty->isVectorTy()) {
  5322. unsigned vectorNumElements = Ty->getVectorNumElements();
  5323. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  5324. _Analysis_assume_(vectorNumElements <= 4);
  5325. for (unsigned i = 0; i < vectorNumElements; i++) {
  5326. vals[i] = Builder.CreateExtractElement(val, i);
  5327. mask |= (1<<i);
  5328. }
  5329. } else {
  5330. vals[0] = val;
  5331. mask = DXIL::kCompMask_X;
  5332. }
  5333. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  5334. vals, mask);
  5335. };
  5336. if (arraySize > 1)
  5337. val = Builder.CreateExtractValue(val, 0);
  5338. StElement(offset, val, Builder);
  5339. if (arraySize > 1) {
  5340. val = stInst->getValueOperand();
  5341. for (unsigned i = 1; i < arraySize; i++) {
  5342. offset = Builder.CreateAdd(offset, eltSize);
  5343. Value *eltVal = Builder.CreateExtractValue(val, i);
  5344. StElement(offset, eltVal, Builder);
  5345. }
  5346. }
  5347. }
  5348. user->eraseFromParent();
  5349. } else {
  5350. // should only used by GEP
  5351. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5352. Type *Ty = GEP->getType()->getPointerElementType();
  5353. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  5354. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  5355. "else bitness is wrong");
  5356. if (baseOffset)
  5357. offset = Builder.CreateAdd(offset, baseOffset);
  5358. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5359. Value *GEPUser = *(U++);
  5360. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
  5361. bufIdx, offset, status, OP, DL);
  5362. }
  5363. // delete the inst
  5364. GEP->eraseFromParent();
  5365. }
  5366. }
  5367. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  5368. hlsl::OP *OP, const DataLayout &DL) {
  5369. Value *bufIdx = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  5370. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5371. Value *user = *(U++);
  5372. TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, bufIdx,
  5373. /*baseOffset*/ nullptr, status, OP, DL);
  5374. }
  5375. }
  5376. }
  5377. // HLSubscript.
  5378. namespace {
  5379. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  5380. DXIL::ResourceClass RC, Value *handle,
  5381. LoadInst *ldInst, IRBuilder<> &Builder,
  5382. hlsl::OP *hlslOP, const DataLayout &DL) {
  5383. ResLoadHelper ldHelper(CI, RK, RC, handle, /*bForSubscript*/ true);
  5384. // Default sampleIdx for 2DMS textures.
  5385. if (RK == DxilResource::Kind::Texture2DMS ||
  5386. RK == DxilResource::Kind::Texture2DMSArray)
  5387. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  5388. // use ldInst as retVal
  5389. ldHelper.retVal = ldInst;
  5390. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  5391. // delete the ld
  5392. ldInst->eraseFromParent();
  5393. return ldHelper.retVal;
  5394. }
  5395. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  5396. unsigned vectorSize, Instruction *InsertPt) {
  5397. IRBuilder<> Builder(InsertPt);
  5398. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  5399. VecVal =
  5400. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  5401. } else {
  5402. BasicBlock *BB = InsertPt->getParent();
  5403. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  5404. TerminatorInst *TI = BB->getTerminator();
  5405. IRBuilder<> SwitchBuilder(TI);
  5406. LLVMContext &Ctx = InsertPt->getContext();
  5407. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  5408. TI->eraseFromParent();
  5409. Function *F = EndBB->getParent();
  5410. IRBuilder<> endSwitchBuilder(EndBB->begin());
  5411. Type *Ty = VecVal->getType();
  5412. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  5413. for (unsigned i = 0; i < vectorSize; i++) {
  5414. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  5415. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  5416. IRBuilder<> CaseBuilder(CaseBB);
  5417. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  5418. VecPhi->addIncoming(CaseVal, CaseBB);
  5419. CaseBuilder.CreateBr(EndBB);
  5420. }
  5421. VecPhi->addIncoming(VecVal, BB);
  5422. VecVal = VecPhi;
  5423. }
  5424. return VecVal;
  5425. }
  5426. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5427. auto U = CI->user_begin();
  5428. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5429. hlsl::OP *hlslOP = &helper.hlslOP;
  5430. // Resource ptr.
  5431. Value *handle = ptr;
  5432. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  5433. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  5434. Type *Ty = CI->getType()->getPointerElementType();
  5435. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  5436. User *user = *(It++);
  5437. Instruction *I = cast<Instruction>(user);
  5438. IRBuilder<> Builder(I);
  5439. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5440. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.legacyDataLayout);
  5441. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  5442. Value *val = stInst->getValueOperand();
  5443. TranslateStore(RK, handle, val,
  5444. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5445. Builder, hlslOP);
  5446. // delete the st
  5447. stInst->eraseFromParent();
  5448. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  5449. // Must be vector type here.
  5450. unsigned vectorSize = Ty->getVectorNumElements();
  5451. DXASSERT(GEP->getNumIndices() == 2, "");
  5452. Use *GEPIdx = GEP->idx_begin();
  5453. GEPIdx++;
  5454. Value *EltIdx = *GEPIdx;
  5455. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  5456. User *GEPUser = *(GEPIt++);
  5457. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  5458. IRBuilder<> StBuilder(SI);
  5459. // Generate Ld.
  5460. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  5461. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  5462. hlslOP, helper.legacyDataLayout);
  5463. // Update vector.
  5464. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  5465. vectorSize, SI);
  5466. // Generate St.
  5467. // Reset insert point, UpdateVectorElt may move SI to different block.
  5468. StBuilder.SetInsertPoint(SI);
  5469. TranslateStore(RK, handle, ldVal,
  5470. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5471. StBuilder, hlslOP);
  5472. SI->eraseFromParent();
  5473. continue;
  5474. }
  5475. if (!isa<CallInst>(GEPUser)) {
  5476. // Invalid operations.
  5477. Translated = false;
  5478. CI->getContext().emitError(GEP, "Invalid operation on typed buffer");
  5479. return;
  5480. }
  5481. CallInst *userCall = cast<CallInst>(GEPUser);
  5482. HLOpcodeGroup group =
  5483. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5484. if (group != HLOpcodeGroup::HLIntrinsic) {
  5485. // Invalid operations.
  5486. Translated = false;
  5487. CI->getContext().emitError(userCall,
  5488. "Invalid operation on typed buffer");
  5489. return;
  5490. }
  5491. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5492. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5493. switch (IOP) {
  5494. case IntrinsicOp::IOP_InterlockedAdd:
  5495. case IntrinsicOp::IOP_InterlockedAnd:
  5496. case IntrinsicOp::IOP_InterlockedExchange:
  5497. case IntrinsicOp::IOP_InterlockedMax:
  5498. case IntrinsicOp::IOP_InterlockedMin:
  5499. case IntrinsicOp::IOP_InterlockedUMax:
  5500. case IntrinsicOp::IOP_InterlockedUMin:
  5501. case IntrinsicOp::IOP_InterlockedOr:
  5502. case IntrinsicOp::IOP_InterlockedXor:
  5503. case IntrinsicOp::IOP_InterlockedCompareStore:
  5504. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5505. // Invalid operations.
  5506. Translated = false;
  5507. CI->getContext().emitError(
  5508. userCall, "Atomic operation on typed buffer is not supported");
  5509. return;
  5510. } break;
  5511. default:
  5512. // Invalid operations.
  5513. Translated = false;
  5514. CI->getContext().emitError(userCall,
  5515. "Invalid operation on typed buffer");
  5516. return;
  5517. break;
  5518. }
  5519. }
  5520. GEP->eraseFromParent();
  5521. } else {
  5522. CallInst *userCall = cast<CallInst>(user);
  5523. HLOpcodeGroup group =
  5524. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5525. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5526. if (group == HLOpcodeGroup::HLIntrinsic) {
  5527. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5528. if (RC == DXIL::ResourceClass::SRV) {
  5529. // Invalid operations.
  5530. Translated = false;
  5531. switch (IOP) {
  5532. case IntrinsicOp::IOP_InterlockedAdd:
  5533. case IntrinsicOp::IOP_InterlockedAnd:
  5534. case IntrinsicOp::IOP_InterlockedExchange:
  5535. case IntrinsicOp::IOP_InterlockedMax:
  5536. case IntrinsicOp::IOP_InterlockedMin:
  5537. case IntrinsicOp::IOP_InterlockedUMax:
  5538. case IntrinsicOp::IOP_InterlockedUMin:
  5539. case IntrinsicOp::IOP_InterlockedOr:
  5540. case IntrinsicOp::IOP_InterlockedXor:
  5541. case IntrinsicOp::IOP_InterlockedCompareStore:
  5542. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5543. CI->getContext().emitError(
  5544. userCall, "Atomic operation targets must be groupshared on UAV");
  5545. return;
  5546. } break;
  5547. default:
  5548. CI->getContext().emitError(userCall,
  5549. "Invalid operation on typed buffer");
  5550. return;
  5551. break;
  5552. }
  5553. }
  5554. switch (IOP) {
  5555. case IntrinsicOp::IOP_InterlockedAdd: {
  5556. ResLoadHelper helper(CI, RK, RC, handle);
  5557. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5558. helper.addr, /*offset*/ nullptr);
  5559. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  5560. Builder, hlslOP);
  5561. } break;
  5562. case IntrinsicOp::IOP_InterlockedAnd: {
  5563. ResLoadHelper helper(CI, RK, RC, handle);
  5564. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5565. helper.addr, /*offset*/ nullptr);
  5566. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  5567. Builder, hlslOP);
  5568. } break;
  5569. case IntrinsicOp::IOP_InterlockedExchange: {
  5570. ResLoadHelper helper(CI, RK, RC, handle);
  5571. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5572. helper.addr, /*offset*/ nullptr);
  5573. TranslateAtomicBinaryOperation(
  5574. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  5575. } break;
  5576. case IntrinsicOp::IOP_InterlockedMax: {
  5577. ResLoadHelper helper(CI, RK, RC, handle);
  5578. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5579. helper.addr, /*offset*/ nullptr);
  5580. TranslateAtomicBinaryOperation(
  5581. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  5582. } break;
  5583. case IntrinsicOp::IOP_InterlockedMin: {
  5584. ResLoadHelper helper(CI, RK, RC, handle);
  5585. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5586. helper.addr, /*offset*/ nullptr);
  5587. TranslateAtomicBinaryOperation(
  5588. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  5589. } break;
  5590. case IntrinsicOp::IOP_InterlockedUMax: {
  5591. ResLoadHelper helper(CI, RK, RC, handle);
  5592. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5593. helper.addr, /*offset*/ nullptr);
  5594. TranslateAtomicBinaryOperation(
  5595. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  5596. } break;
  5597. case IntrinsicOp::IOP_InterlockedUMin: {
  5598. ResLoadHelper helper(CI, RK, RC, handle);
  5599. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5600. helper.addr, /*offset*/ nullptr);
  5601. TranslateAtomicBinaryOperation(
  5602. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  5603. } break;
  5604. case IntrinsicOp::IOP_InterlockedOr: {
  5605. ResLoadHelper helper(CI, RK, RC, handle);
  5606. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5607. helper.addr, /*offset*/ nullptr);
  5608. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  5609. Builder, hlslOP);
  5610. } break;
  5611. case IntrinsicOp::IOP_InterlockedXor: {
  5612. ResLoadHelper helper(CI, RK, RC, handle);
  5613. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5614. helper.addr, /*offset*/ nullptr);
  5615. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  5616. Builder, hlslOP);
  5617. } break;
  5618. case IntrinsicOp::IOP_InterlockedCompareStore:
  5619. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5620. ResLoadHelper helper(CI, RK, RC, handle);
  5621. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  5622. handle, helper.addr, /*offset*/ nullptr);
  5623. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  5624. } break;
  5625. default:
  5626. DXASSERT(0, "invalid opcode");
  5627. break;
  5628. }
  5629. } else {
  5630. DXASSERT(0, "invalid group");
  5631. }
  5632. userCall->eraseFromParent();
  5633. }
  5634. }
  5635. }
  5636. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  5637. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5638. if (CI->user_empty()) {
  5639. Translated = true;
  5640. return;
  5641. }
  5642. hlsl::OP *hlslOP = &helper.hlslOP;
  5643. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5644. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  5645. HLModule::MergeGepUse(CI);
  5646. // Resource ptr.
  5647. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5648. if (helper.bLegacyCBufferLoad)
  5649. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  5650. helper.legacyDataLayout, pObjHelper);
  5651. else {
  5652. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  5653. hlslOP, helper.dxilTypeSys,
  5654. CI->getModule()->getDataLayout());
  5655. }
  5656. Translated = true;
  5657. return;
  5658. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  5659. // Resource ptr.
  5660. Value *handle = ptr;
  5661. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  5662. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  5663. Value *mipLevel =
  5664. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  5665. auto U = CI->user_begin();
  5666. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  5667. // TODO: support store.
  5668. Instruction *ldInst = cast<Instruction>(*U);
  5669. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  5670. IRBuilder<> Builder(CI);
  5671. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.legacyDataLayout);
  5672. ldInst->eraseFromParent();
  5673. Translated = true;
  5674. return;
  5675. } else {
  5676. Type *HandleTy = hlslOP->GetHandleType();
  5677. if (ptr->getType() == HandleTy) {
  5678. // Resource ptr.
  5679. Value *handle = ptr;
  5680. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  5681. if (RK == DxilResource::Kind::Invalid) {
  5682. Translated = false;
  5683. return;
  5684. }
  5685. Translated = true;
  5686. Type *ObjTy = pObjHelper->GetResourceType(handle);
  5687. Type *RetTy = ObjTy->getStructElementType(0);
  5688. if (RK == DxilResource::Kind::StructuredBuffer) {
  5689. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  5690. helper.legacyDataLayout);
  5691. } else if (RetTy->isAggregateType() &&
  5692. RK == DxilResource::Kind::TypedBuffer) {
  5693. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  5694. helper.legacyDataLayout);
  5695. // Clear offset for typed buf.
  5696. for (auto User : handle->users()) {
  5697. CallInst *CI = cast<CallInst>(User);
  5698. // Skip not lowered HL functions.
  5699. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  5700. continue;
  5701. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  5702. case DXIL::OpCode::BufferLoad: {
  5703. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  5704. UndefValue::get(helper.i32Ty));
  5705. } break;
  5706. case DXIL::OpCode::BufferStore: {
  5707. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  5708. UndefValue::get(helper.i32Ty));
  5709. } break;
  5710. case DXIL::OpCode::AtomicBinOp: {
  5711. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  5712. UndefValue::get(helper.i32Ty));
  5713. } break;
  5714. case DXIL::OpCode::AtomicCompareExchange: {
  5715. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  5716. UndefValue::get(helper.i32Ty));
  5717. } break;
  5718. default:
  5719. DXASSERT(0, "Invalid operation on resource handle");
  5720. break;
  5721. }
  5722. }
  5723. } else {
  5724. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  5725. }
  5726. return;
  5727. }
  5728. }
  5729. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5730. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  5731. // Translate matrix into vector of array for share memory or local
  5732. // variable should be done in HLMatrixLowerPass
  5733. DXASSERT_NOMSG(0);
  5734. Translated = true;
  5735. return;
  5736. }
  5737. // Other case should be take care in TranslateStructBufSubscript or
  5738. // TranslateCBOperations.
  5739. Translated = false;
  5740. return;
  5741. }
  5742. }
  5743. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  5744. for (auto U = F->user_begin(); U != F->user_end();) {
  5745. Value *user = *(U++);
  5746. if (!isa<Instruction>(user))
  5747. continue;
  5748. // must be call inst
  5749. CallInst *CI = cast<CallInst>(user);
  5750. unsigned opcode = GetHLOpcode(CI);
  5751. bool Translated = true;
  5752. TranslateHLSubscript(
  5753. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  5754. if (Translated) {
  5755. // delete the call
  5756. DXASSERT(CI->use_empty(),
  5757. "else TranslateHLSubscript didn't replace/erase uses");
  5758. CI->eraseFromParent();
  5759. }
  5760. }
  5761. }
  5762. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  5763. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  5764. if (group == HLOpcodeGroup::HLIntrinsic) {
  5765. // map to dxil operations
  5766. for (auto U = F->user_begin(); U != F->user_end();) {
  5767. Value *User = *(U++);
  5768. if (!isa<Instruction>(User))
  5769. continue;
  5770. // must be call inst
  5771. CallInst *CI = cast<CallInst>(User);
  5772. // Keep the instruction to lower by other function.
  5773. bool Translated = true;
  5774. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  5775. if (Translated) {
  5776. // delete the call
  5777. DXASSERT(CI->use_empty(),
  5778. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  5779. CI->eraseFromParent();
  5780. }
  5781. }
  5782. } else {
  5783. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5784. // Both ld/st use arg1 for the pointer.
  5785. Type *PtrTy =
  5786. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  5787. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace ||
  5788. // TODO: use DeviceAddressSpace for SRV/UAV and CBufferAddressSpace
  5789. // for CBuffer.
  5790. PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  5791. // Translate matrix into vector of array for share memory or local
  5792. // variable should be done in HLMatrixLowerPass.
  5793. if (!F->user_empty())
  5794. F->getContext().emitError("Fail to lower matrix load/store.");
  5795. }
  5796. } else if (group == HLOpcodeGroup::HLSubscript) {
  5797. TranslateSubscriptOperation(F, helper, pObjHelper);
  5798. }
  5799. // map to math function or llvm ir
  5800. }
  5801. }
  5802. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  5803. static void TranslateHLExtension(Function *F,
  5804. HLSLExtensionsCodegenHelper *helper,
  5805. OP& hlslOp) {
  5806. // Find all calls to the function F.
  5807. // Store the calls in a vector for now to be replaced the loop below.
  5808. // We use a two step "find then replace" to avoid removing uses while
  5809. // iterating.
  5810. SmallVector<CallInst *, 8> CallsToReplace;
  5811. for (User *U : F->users()) {
  5812. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  5813. CallsToReplace.push_back(CI);
  5814. }
  5815. }
  5816. // Get the lowering strategy to use for this intrinsic.
  5817. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  5818. ExtensionLowering lower(LowerStrategy, helper, hlslOp);
  5819. // Replace all calls that were successfully translated.
  5820. for (CallInst *CI : CallsToReplace) {
  5821. Value *Result = lower.Translate(CI);
  5822. if (Result && Result != CI) {
  5823. CI->replaceAllUsesWith(Result);
  5824. CI->eraseFromParent();
  5825. }
  5826. }
  5827. }
  5828. namespace hlsl {
  5829. void TranslateBuiltinOperations(
  5830. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  5831. std::unordered_set<LoadInst *> &UpdateCounterSet,
  5832. std::unordered_set<Value *> &NonUniformSet) {
  5833. HLOperationLowerHelper helper(HLM);
  5834. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet,
  5835. NonUniformSet};
  5836. Module *M = HLM.GetModule();
  5837. // generate dxil operation
  5838. for (iplist<Function>::iterator F : M->getFunctionList()) {
  5839. if (!F->isDeclaration()) {
  5840. continue;
  5841. }
  5842. if (F->user_empty())
  5843. continue;
  5844. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  5845. if (group == HLOpcodeGroup::NotHL) {
  5846. // Nothing to do.
  5847. continue;
  5848. }
  5849. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  5850. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
  5851. continue;
  5852. }
  5853. if (group == HLOpcodeGroup::HLCreateHandle) {
  5854. // Will lower in later pass.
  5855. continue;
  5856. }
  5857. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  5858. }
  5859. }
  5860. }