HLOperationLower.cpp 322 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #define _USE_MATH_DEFINES
  12. #include <array>
  13. #include <cmath>
  14. #include <unordered_set>
  15. #include <functional>
  16. #include "dxc/DXIL/DxilModule.h"
  17. #include "dxc/DXIL/DxilOperations.h"
  18. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  19. #include "dxc/HLSL/HLMatrixType.h"
  20. #include "dxc/HLSL/HLModule.h"
  21. #include "dxc/DXIL/DxilUtil.h"
  22. #include "dxc/HLSL/HLOperationLower.h"
  23. #include "dxc/HLSL/HLOperationLowerExtension.h"
  24. #include "dxc/HLSL/HLOperations.h"
  25. #include "dxc/HlslIntrinsicOp.h"
  26. #include "dxc/HLSL/DxilConvergent.h"
  27. #include "dxc/DXIL/DxilResourceProperties.h"
  28. #include "llvm/IR/GetElementPtrTypeIterator.h"
  29. #include "llvm/IR/IRBuilder.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/ADT/APSInt.h"
  33. using namespace llvm;
  34. using namespace hlsl;
  35. struct HLOperationLowerHelper {
  36. OP &hlslOP;
  37. Type *voidTy;
  38. Type *f32Ty;
  39. Type *i32Ty;
  40. llvm::Type *i1Ty;
  41. Type *i8Ty;
  42. DxilTypeSystem &dxilTypeSys;
  43. DxilFunctionProps *functionProps;
  44. bool bLegacyCBufferLoad;
  45. DataLayout dataLayout;
  46. HLOperationLowerHelper(HLModule &HLM);
  47. };
  48. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  49. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  50. dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
  51. ? hlsl::DXIL::kLegacyLayoutString
  52. : hlsl::DXIL::kNewLayoutString)) {
  53. llvm::LLVMContext &Ctx = HLM.GetCtx();
  54. voidTy = Type::getVoidTy(Ctx);
  55. f32Ty = Type::getFloatTy(Ctx);
  56. i32Ty = Type::getInt32Ty(Ctx);
  57. i1Ty = Type::getInt1Ty(Ctx);
  58. i8Ty = Type::getInt8Ty(Ctx);
  59. Function *EntryFunc = HLM.GetEntryFunction();
  60. functionProps = nullptr;
  61. if (HLM.HasDxilFunctionProps(EntryFunc))
  62. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  63. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  64. }
  65. struct HLObjectOperationLowerHelper {
  66. private:
  67. // For object intrinsics.
  68. HLModule &HLM;
  69. struct ResAttribute {
  70. DXIL::ResourceClass RC;
  71. DXIL::ResourceKind RK;
  72. Type *ResourceType;
  73. };
  74. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  75. std::unordered_set<LoadInst *> &UpdateCounterSet;
  76. // Map from pointer of cbuffer to pointer of resource.
  77. // For cbuffer like this:
  78. // cbuffer A {
  79. // Texture2D T;
  80. // };
  81. // A global resource Texture2D T2 will be created for Texture2D T.
  82. // CBPtrToResourceMap[T] will return T2.
  83. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  84. public:
  85. HLObjectOperationLowerHelper(HLModule &HLM,
  86. std::unordered_set<LoadInst *> &UpdateCounter)
  87. : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
  88. DXIL::ResourceClass GetRC(Value *Handle) {
  89. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  90. return Res.RC;
  91. }
  92. DXIL::ResourceKind GetRK(Value *Handle) {
  93. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  94. return Res.RK;
  95. }
  96. Type *GetResourceType(Value *Handle) {
  97. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  98. return Res.ResourceType;
  99. }
  100. void MarkHasCounter(Value *handle, Type *i8Ty) {
  101. CallInst *CIHandle = cast<CallInst>(handle);
  102. DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle, "else invalid handle");
  103. // Mark has counter for the input handle.
  104. Value *counterHandle =
  105. CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
  106. // Change kind into StructurBufferWithCounter.
  107. CIHandle->setArgOperand(
  108. HLOperandIndex::kAnnotateHandleResourceKindOpIdx,
  109. ConstantInt::get(
  110. i8Ty,
  111. (unsigned)DXIL::ResourceKind::StructuredBufferWithCounter));
  112. DXIL::ResourceClass RC = GetRC(handle);
  113. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  114. "must UAV for counter");
  115. std::unordered_set<Value *> resSet;
  116. MarkHasCounterOnCreateHandle(counterHandle, resSet);
  117. }
  118. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  119. GlobalVariable *CbGV,
  120. DxilResourceProperties &RP) {
  121. // Change array idx to 0 to make sure all array ptr share same key.
  122. Value *Key = UniformCbPtr(CbPtr, CbGV);
  123. if (CBPtrToResourceMap.count(Key))
  124. return CBPtrToResourceMap[Key];
  125. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
  126. CBPtrToResourceMap[Key] = Resource;
  127. return Resource;
  128. }
  129. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  130. // Simple case.
  131. if (ResPtr->getType() == CbPtr->getType())
  132. return ResPtr;
  133. // Array case.
  134. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  135. IRBuilder<> Builder(CbPtr);
  136. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  137. Value *arrayIdx = GEPIt.getOperand();
  138. // Only calc array idx and size.
  139. // Ignore struct type part.
  140. for (; GEPIt != E; ++GEPIt) {
  141. if (GEPIt->isArrayTy()) {
  142. arrayIdx = Builder.CreateMul(
  143. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  144. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  145. }
  146. }
  147. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  148. }
  149. DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
  150. DXIL::ResourceClass RC =
  151. (DXIL::ResourceClass)cast<ConstantInt>(
  152. Anno->getArgOperand(
  153. HLOperandIndex::kAnnotateHandleResourceClassOpIdx))
  154. ->getLimitedValue();
  155. DXIL::ResourceKind RK =
  156. (DXIL::ResourceKind)cast<ConstantInt>(
  157. Anno->getArgOperand(
  158. HLOperandIndex::kAnnotateHandleResourceKindOpIdx))
  159. ->getLimitedValue();
  160. Constant *Props = cast<Constant>(Anno->getArgOperand(
  161. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
  162. DxilResourceProperties RP = resource_helper::loadFromConstant(
  163. *Props, RC, RK);
  164. return RP;
  165. }
  166. private:
  167. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  168. if (HandleMetaMap.count(Handle))
  169. return HandleMetaMap[Handle];
  170. // Add invalid first to avoid dead loop.
  171. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  172. DXIL::ResourceKind::Invalid,
  173. StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
  174. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  175. hlsl::HLOpcodeGroup group =
  176. hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  177. if (group == HLOpcodeGroup::HLAnnotateHandle) {
  178. ConstantInt *RC = cast<ConstantInt>(CI->getArgOperand(
  179. HLOperandIndex::kAnnotateHandleResourceClassOpIdx));
  180. ConstantInt *RK = cast<ConstantInt>(CI->getArgOperand(
  181. HLOperandIndex::kAnnotateHandleResourceKindOpIdx));
  182. Type *ResTy =
  183. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
  184. ->getType();
  185. ResAttribute Attrib = {(DXIL::ResourceClass)RC->getLimitedValue(),
  186. (DXIL::ResourceKind)RK->getLimitedValue(),
  187. ResTy};
  188. HandleMetaMap[Handle] = Attrib;
  189. return HandleMetaMap[Handle];
  190. }
  191. }
  192. Handle->getContext().emitError("cannot map resource to handle");
  193. return HandleMetaMap[Handle];
  194. }
  195. CallInst *FindCreateHandle(Value *handle,
  196. std::unordered_set<Value *> &resSet) {
  197. // Already checked.
  198. if (resSet.count(handle))
  199. return nullptr;
  200. resSet.insert(handle);
  201. if (CallInst *CI = dyn_cast<CallInst>(handle))
  202. return CI;
  203. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  204. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  205. return CI;
  206. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  207. return CI;
  208. return nullptr;
  209. }
  210. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  211. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  212. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  213. return CI;
  214. }
  215. return nullptr;
  216. }
  217. return nullptr;
  218. }
  219. void MarkHasCounterOnCreateHandle(Value *handle,
  220. std::unordered_set<Value *> &resSet) {
  221. // Already checked.
  222. if (resSet.count(handle))
  223. return;
  224. resSet.insert(handle);
  225. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  226. Value *Res =
  227. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  228. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  229. if (!LdRes) {
  230. dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle.");
  231. return;
  232. }
  233. UpdateCounterSet.insert(LdRes);
  234. return;
  235. }
  236. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  237. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  238. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  239. }
  240. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  241. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  242. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  243. }
  244. }
  245. }
  246. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  247. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  248. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  249. unsigned i = 0;
  250. IRBuilder<> Builder(HLM.GetCtx());
  251. Value *zero = Builder.getInt32(0);
  252. for (; GEPIt != E; ++GEPIt, ++i) {
  253. ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand());
  254. if (!ImmIdx) {
  255. // Remove dynamic indexing to avoid crash.
  256. idxList[i] = zero;
  257. }
  258. }
  259. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  260. return Key;
  261. }
  262. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  263. DxilResourceProperties &RP) {
  264. Type *CbTy = CbPtr->getPointerOperandType();
  265. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  266. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  267. unsigned i = 0;
  268. IRBuilder<> Builder(HLM.GetCtx());
  269. unsigned arraySize = 1;
  270. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  271. std::string Name;
  272. for (; GEPIt != E; ++GEPIt, ++i) {
  273. if (GEPIt->isArrayTy()) {
  274. arraySize *= GEPIt->getArrayNumElements();
  275. if (!Name.empty())
  276. Name += ".";
  277. if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) {
  278. unsigned idx = ImmIdx->getLimitedValue();
  279. Name += std::to_string(idx);
  280. }
  281. } else if (GEPIt->isStructTy()) {
  282. DxilStructAnnotation *typeAnnot =
  283. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  284. DXASSERT_NOMSG(typeAnnot);
  285. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  286. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  287. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  288. if (!Name.empty())
  289. Name += ".";
  290. Name += fieldAnnot.GetFieldName();
  291. }
  292. }
  293. Type *Ty = CbPtr->getResultElementType();
  294. // Not support resource array in cbuffer.
  295. unsigned ResBinding = HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.Class);
  296. return CreateResourceGV(Ty, Name, RP, ResBinding);
  297. }
  298. Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
  299. unsigned ResBinding) {
  300. Module &M = *HLM.GetModule();
  301. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  302. // Create resource and set GV as globalSym.
  303. DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
  304. DXASSERT(Res, "fail to create resource for global variable in cbuffer");
  305. Res->SetLowerBound(ResBinding);
  306. return GV;
  307. }
  308. };
  309. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  310. DXIL::OpCode opcode,
  311. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  312. struct IntrinsicLower {
  313. // Intrinsic opcode.
  314. IntrinsicOp IntriOpcode;
  315. // Lower function.
  316. IntrinsicLowerFuncTy &LowerFunc;
  317. // DXIL opcode if can direct map.
  318. DXIL::OpCode DxilOpcode;
  319. };
  320. // IOP intrinsics.
  321. namespace {
  322. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  323. Type *Ty, Type *RetTy, OP *hlslOP,
  324. IRBuilder<> &Builder) {
  325. unsigned argNum = refArgs.size();
  326. std::vector<Value *> args = refArgs;
  327. if (Ty->isVectorTy()) {
  328. Value *retVal = llvm::UndefValue::get(RetTy);
  329. unsigned vecSize = Ty->getVectorNumElements();
  330. for (unsigned i = 0; i < vecSize; i++) {
  331. // Update vector args, skip known opcode arg.
  332. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  333. argIdx++) {
  334. if (refArgs[argIdx]->getType()->isVectorTy()) {
  335. Value *arg = refArgs[argIdx];
  336. args[argIdx] = Builder.CreateExtractElement(arg, i);
  337. }
  338. }
  339. Value *EltOP =
  340. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  341. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  342. }
  343. return retVal;
  344. } else {
  345. if (!RetTy->isVoidTy()) {
  346. Value *retVal =
  347. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  348. return retVal;
  349. } else {
  350. // Cannot add name to void.
  351. return Builder.CreateCall(dxilFunc, args);
  352. }
  353. }
  354. }
  355. // Generates a DXIL operation over an overloaded type (Ty), returning a
  356. // RetTy value; when Ty is a vector, it will replicate per-element operations
  357. // into RetTy to rebuild it.
  358. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  359. Type *Ty, Type *RetTy, OP *hlslOP,
  360. IRBuilder<> &Builder) {
  361. Type *EltTy = Ty->getScalarType();
  362. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  363. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  364. }
  365. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  366. Type *Ty, Instruction *Inst, OP *hlslOP) {
  367. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  368. DXASSERT(refArgs[0] == nullptr,
  369. "else caller has already filled the value in");
  370. IRBuilder<> B(Inst);
  371. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  372. const_cast<llvm::Value **>(refArgs.data())[0] =
  373. opArg; // actually stack memory from caller
  374. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  375. }
  376. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  377. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  378. Type *Ty = src->getType();
  379. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  380. Value *args[] = {opArg, src};
  381. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  382. }
  383. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  384. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  385. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  386. Builder);
  387. }
  388. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  389. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  390. Type *Ty = src0->getType();
  391. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  392. Value *args[] = {opArg, src0, src1};
  393. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  394. }
  395. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  396. Value *src2, hlsl::OP *hlslOP,
  397. IRBuilder<> &Builder) {
  398. Type *Ty = src0->getType();
  399. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  400. Value *args[] = {opArg, src0, src1, src2};
  401. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  402. }
  403. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  404. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  405. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  406. IRBuilder<> Builder(CI);
  407. hlsl::OP *hlslOP = &helper.hlslOP;
  408. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  409. return retVal;
  410. }
  411. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  412. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  413. hlsl::OP *hlslOP = &helper.hlslOP;
  414. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  415. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  416. IRBuilder<> Builder(CI);
  417. Value *binOp =
  418. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  419. return binOp;
  420. }
  421. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  422. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  423. hlsl::OP *hlslOP = &helper.hlslOP;
  424. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  425. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  426. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  427. IRBuilder<> Builder(CI);
  428. Value *triOp =
  429. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  430. return triOp;
  431. }
  432. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  433. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  434. hlsl::OP *hlslOP = &helper.hlslOP;
  435. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  436. IRBuilder<> Builder(CI);
  437. Type *Ty = src->getType();
  438. Type *RetTy = Type::getInt1Ty(CI->getContext());
  439. if (Ty->isVectorTy())
  440. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  441. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  442. Value *args[] = {opArg, src};
  443. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  444. }
  445. bool IsResourceGEP(GetElementPtrInst *I) {
  446. Type *Ty = I->getType()->getPointerElementType();
  447. Ty = dxilutil::GetArrayEltTy(Ty);
  448. // Only mark on GEP which point to resource.
  449. return dxilutil::IsHLSLResourceType(Ty);
  450. }
  451. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  452. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  453. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  454. CI->replaceAllUsesWith(V);
  455. for (User *U : V->users()) {
  456. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
  457. // Only mark on GEP which point to resource.
  458. if (IsResourceGEP(I))
  459. DxilMDHelper::MarkNonUniform(I);
  460. } else if (CastInst *castI = dyn_cast<CastInst>(U)) {
  461. for (User *castU : castI->users()) {
  462. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
  463. // Only mark on GEP which point to resource.
  464. if (IsResourceGEP(I))
  465. DxilMDHelper::MarkNonUniform(I);
  466. }
  467. }
  468. }
  469. }
  470. return nullptr;
  471. }
  472. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  473. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  474. hlsl::OP *OP = &helper.hlslOP;
  475. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  476. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  477. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  478. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  479. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  480. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  481. unsigned barrierMode = 0;
  482. switch (IOP) {
  483. case IntrinsicOp::IOP_AllMemoryBarrier:
  484. barrierMode = uglobal | g;
  485. break;
  486. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  487. barrierMode = uglobal | g | t;
  488. break;
  489. case IntrinsicOp::IOP_GroupMemoryBarrier:
  490. barrierMode = g;
  491. break;
  492. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  493. barrierMode = g | t;
  494. break;
  495. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  496. barrierMode = uglobal;
  497. break;
  498. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  499. barrierMode = uglobal | t;
  500. break;
  501. default:
  502. DXASSERT(0, "invalid opcode for barrier");
  503. break;
  504. }
  505. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  506. Value *args[] = {opArg, src0};
  507. IRBuilder<> Builder(CI);
  508. Builder.CreateCall(dxilFunc, args);
  509. return nullptr;
  510. }
  511. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  512. OP::OpCode opcode,
  513. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  514. IRBuilder<> Builder(CI);
  515. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  516. Type *Ty = val->getType();
  517. // Use the same scaling factor used by FXC (i.e., 255.001953)
  518. // Excerpt from stackoverflow discussion:
  519. // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
  520. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953);
  521. if (Ty->isVectorTy()) {
  522. static constexpr int supportedVecElemCount = 4;
  523. if (Ty->getVectorNumElements() == supportedVecElemCount) {
  524. toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst);
  525. // Swizzle the input val -> val.zyxw
  526. std::vector<int> mask { 2, 1, 0, 3 };
  527. val = Builder.CreateShuffleVector(val, val, mask);
  528. } else {
  529. dxilutil::EmitErrorOnInstruction(CI, "Unsupported input type for intrinsic D3DColorToUByte4.");
  530. return UndefValue::get(CI->getType());
  531. }
  532. }
  533. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  534. return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
  535. }
  536. // Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
  537. // Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
  538. // Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
  539. // Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
  540. // is a positive or a negative value. Here "n" is the number of scalar elements in power.
  541. // Rule 3: Power must be an exact value.
  542. // +----------+---------------------+------------------+
  543. // | BaseType | IsExponentPositive | MaxMulOpsAllowed |
  544. // +----------+---------------------+------------------+
  545. // | float4x4 | True | 33 |
  546. // | float4x4 | False | 17 |
  547. // | float4x2 | True | 17 |
  548. // | float4x2 | False | 9 |
  549. // | float2x4 | True | 17 |
  550. // | float2x4 | False | 9 |
  551. // | float4 | True | 9 |
  552. // | float4 | False | 5 |
  553. // | float2 | True | 5 |
  554. // | float2 | False | 3 |
  555. // | float | True | 3 |
  556. // | float | False | 2 |
  557. // +----------+---------------------+------------------+
  558. bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
  559. // Applicable only when power is a literal.
  560. if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
  561. return false;
  562. }
  563. // Only apply this code gen on splat values.
  564. if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
  565. if (!hlsl::dxilutil::IsSplat(cdv)) {
  566. return false;
  567. }
  568. }
  569. APFloat powAPF = isa<ConstantDataVector>(pow) ?
  570. cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
  571. cast<ConstantFP>(pow)->getValueAPF();
  572. APSInt powAPS(32, false);
  573. bool isExact = false;
  574. // Try converting float value of power to integer and also check if the float value is exact.
  575. APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
  576. if (status == APFloat::opStatus::opOK && isExact) {
  577. powI = powAPS.getExtValue();
  578. uint32_t powU = abs(powI);
  579. int setBitCount = 0;
  580. int maxBitSetPos = -1;
  581. for (int i = 0; i < 32; i++) {
  582. if ((powU >> i) & 1) {
  583. setBitCount++;
  584. maxBitSetPos = i;
  585. }
  586. }
  587. DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
  588. unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
  589. int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
  590. int mulOpNeeded = maxBitSetPos + setBitCount - 1;
  591. return mulOpNeeded <= mulOpThreshold;
  592. }
  593. return false;
  594. }
  595. Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
  596. uint32_t absY = abs(y);
  597. // If y is zero then always return 1.
  598. if (absY == 0) {
  599. return ConstantFP::get(x->getType(), 1);
  600. }
  601. int lastSetPos = -1;
  602. Value *result = nullptr;
  603. Value *mul = nullptr;
  604. for (int i = 0; i < 32; i++) {
  605. if ((absY >> i) & 1) {
  606. for (int j = i; j > lastSetPos; j--) {
  607. if (!mul) {
  608. mul = x;
  609. }
  610. else {
  611. mul = Builder.CreateFMul(mul, mul);
  612. }
  613. }
  614. result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
  615. lastSetPos = i;
  616. }
  617. }
  618. // Compute reciprocal for negative power values.
  619. if (y < 0) {
  620. Value* constOne = ConstantFP::get(x->getType(), 1);
  621. result = Builder.CreateFDiv(constOne, result);
  622. }
  623. return result;
  624. }
  625. Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
  626. // As applicable implement pow using only mul ops as done by Fxc.
  627. int32_t p = 0;
  628. if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
  629. if (isFXCCompatMode) {
  630. return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
  631. } else if (p == 2) {
  632. // Only take care 2 for it will not affect register pressure.
  633. return Builder.CreateFMul(x, x);
  634. }
  635. }
  636. // Default to log-mul-exp pattern if previous scenarios don't apply.
  637. // t = log(x);
  638. Value *logX =
  639. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  640. // t = y * t;
  641. Value *mulY = Builder.CreateFMul(logX, y);
  642. // pow = exp(t);
  643. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  644. }
  645. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  646. OP::OpCode opcode,
  647. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  648. hlsl::OP *hlslOP = &helper.hlslOP;
  649. IRBuilder<> Builder(CI);
  650. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  651. Type *Ty = val->getType();
  652. VectorType *VT = dyn_cast<VectorType>(Ty);
  653. if (!VT) {
  654. dxilutil::EmitErrorOnInstruction(
  655. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  656. return UndefValue::get(Ty);
  657. }
  658. unsigned size = VT->getNumElements();
  659. if (size != 2 && size != 4) {
  660. dxilutil::EmitErrorOnInstruction(
  661. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  662. return UndefValue::get(Ty);
  663. }
  664. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  665. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  666. Value *RetVal = UndefValue::get(Ty);
  667. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  668. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  669. for (unsigned i=0; i<size; i+=2) {
  670. Value *low0 = Builder.CreateExtractElement(op0, i);
  671. Value *low1 = Builder.CreateExtractElement(op1, i);
  672. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  673. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  674. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  675. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  676. // Ext i1 to i32
  677. carry = Builder.CreateZExt(carry, helper.i32Ty);
  678. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  679. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  680. Value *hi = Builder.CreateAdd(hi0, hi1);
  681. hi = Builder.CreateAdd(hi, carry);
  682. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  683. }
  684. return RetVal;
  685. }
  686. bool IsValidLoadInput(Value *V) {
  687. // Must be load input.
  688. // TODO: report this error on front-end
  689. if (!V || !isa<CallInst>(V)) {
  690. return false;
  691. }
  692. CallInst *CI = cast<CallInst>(V);
  693. // Must be immediate.
  694. ConstantInt *opArg =
  695. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  696. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  697. if (op != DXIL::OpCode::LoadInput) {
  698. return false;
  699. }
  700. return true;
  701. }
  702. // Tunnel through insert/extract element and shuffle to find original source
  703. // of scalar value, or specified element (vecIdx) of vector value.
  704. Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
  705. Type *srcTy = src->getType()->getScalarType();
  706. while (src && !isa<UndefValue>(src)) {
  707. if (src->getType()->isVectorTy()) {
  708. if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
  709. unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2))
  710. ->getUniqueInteger().getLimitedValue();
  711. src = IE->getOperand( (curIdx == vecIdx) ? 1 : 0 );
  712. } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  713. int newIdx = SV->getMaskValue(vecIdx);
  714. if (newIdx < 0)
  715. return UndefValue::get(srcTy);
  716. vecIdx = (unsigned)newIdx;
  717. src = SV->getOperand(0);
  718. unsigned numElt = src->getType()->getVectorNumElements();
  719. if (numElt <= vecIdx) {
  720. vecIdx -= numElt;
  721. src = SV->getOperand(1);
  722. }
  723. } else {
  724. return UndefValue::get(srcTy); // Didn't find it.
  725. }
  726. } else {
  727. if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) {
  728. vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
  729. ->getUniqueInteger().getLimitedValue();
  730. src = EE->getVectorOperand();
  731. } else if (hlsl::IsConvergentMarker(src)) {
  732. src = hlsl::GetConvergentSource(src);
  733. } else {
  734. break; // Found it.
  735. }
  736. }
  737. }
  738. return src;
  739. }
  740. // Finds corresponding inputs, calls translation for each, and returns
  741. // resulting vector or scalar.
  742. // Uses functor that takes (inputElemID, rowIdx, colIdx), and returns
  743. // translation for one input scalar.
  744. Value *TranslateEvalHelper(CallInst *CI, Value *val, IRBuilder<> &Builder,
  745. std::function<Value*(Value*, Value*, Value*)> fnTranslateScalarInput) {
  746. Type *Ty = CI->getType();
  747. Value *result = UndefValue::get(Ty);
  748. if (Ty->isVectorTy()) {
  749. for (unsigned i = 0; i < Ty->getVectorNumElements(); ++i) {
  750. Value *InputEl = FindScalarSource(val, i);
  751. if (!IsValidLoadInput(InputEl)) {
  752. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  753. "on values taken directly from inputs.");
  754. return result;
  755. }
  756. CallInst *loadInput = cast<CallInst>(InputEl);
  757. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  758. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  759. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  760. Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  761. result = Builder.CreateInsertElement(result, Elt, i);
  762. }
  763. }
  764. else {
  765. Value *InputEl = FindScalarSource(val);
  766. if (!IsValidLoadInput(InputEl)) {
  767. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  768. "on values taken directly from inputs.");
  769. return result;
  770. }
  771. CallInst *loadInput = cast<CallInst>(InputEl);
  772. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  773. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  774. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  775. result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  776. }
  777. return result;
  778. }
  779. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  780. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  781. hlsl::OP *hlslOP = &helper.hlslOP;
  782. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  783. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  784. IRBuilder<> Builder(CI);
  785. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  786. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  787. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  788. return TranslateEvalHelper(CI, val, Builder,
  789. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  790. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  791. }
  792. );
  793. }
  794. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  795. HLOperationLowerHelper &helper,
  796. HLObjectOperationLowerHelper *pObjHelper,
  797. bool &Translated) {
  798. hlsl::OP *hlslOP = &helper.hlslOP;
  799. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  800. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  801. IRBuilder<> Builder(CI);
  802. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  803. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  804. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  805. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  806. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  807. return TranslateEvalHelper(CI, val, Builder,
  808. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  809. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  810. }
  811. );
  812. }
  813. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  814. HLOperationLowerHelper &helper,
  815. HLObjectOperationLowerHelper *pObjHelper,
  816. bool &Translated) {
  817. hlsl::OP *hlslOP = &helper.hlslOP;
  818. Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  819. IRBuilder<> Builder(CI);
  820. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  821. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  822. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  823. return TranslateEvalHelper(CI, val, Builder,
  824. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  825. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  826. }
  827. );
  828. }
  829. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  830. HLOperationLowerHelper &helper,
  831. HLObjectOperationLowerHelper *pObjHelper,
  832. bool &Translated) {
  833. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  834. hlsl::OP *hlslOP = &helper.hlslOP;
  835. IRBuilder<> Builder(CI);
  836. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  837. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  838. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  839. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  840. Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType());
  841. return TranslateEvalHelper(CI, val, Builder,
  842. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  843. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  844. }
  845. );
  846. }
  847. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  848. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  849. hlsl::OP *hlslOP = &helper.hlslOP;
  850. Type *Ty = Type::getVoidTy(CI->getContext());
  851. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  852. Value *args[] = {opArg};
  853. IRBuilder<> Builder(CI);
  854. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  855. return dxilOp;
  856. }
  857. Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  858. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  859. hlsl::OP *hlslOP = &helper.hlslOP;
  860. Type *Ty = CI->getType();
  861. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  862. Value *args[] = {opArg};
  863. IRBuilder<> Builder(CI);
  864. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  865. return dxilOp;
  866. }
  867. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  868. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  869. hlsl::OP *hlslOP = &helper.hlslOP;
  870. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  871. IRBuilder<> Builder(CI);
  872. Type *Ty = Type::getVoidTy(CI->getContext());
  873. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  874. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  875. Value *args[] = {opArg, val};
  876. Value *samplePos =
  877. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  878. Value *result = UndefValue::get(CI->getType());
  879. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  880. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  881. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  882. result = Builder.CreateInsertElement(result, samplePosY, 1);
  883. return result;
  884. }
  885. // val QuadReadLaneAt(val, uint);
  886. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  887. OP::OpCode opcode,
  888. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  889. hlsl::OP *hlslOP = &helper.hlslOP;
  890. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  891. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  892. CI->getOperand(1)->getType(), CI, hlslOP);
  893. }
  894. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  895. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  896. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  897. hlsl::OP *hlslOP = &helper.hlslOP;
  898. DXIL::QuadOpKind opKind;
  899. switch (IOP) {
  900. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  901. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  902. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  903. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  904. }
  905. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  906. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  907. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  908. CI->getOperand(1)->getType(), CI, hlslOP);
  909. }
  910. // WaveAllEqual(val<n>)->bool<n>
  911. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  912. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  913. hlsl::OP *hlslOP = &helper.hlslOP;
  914. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  915. IRBuilder<> Builder(CI);
  916. Type *Ty = src->getType();
  917. Type *RetTy = Type::getInt1Ty(CI->getContext());
  918. if (Ty->isVectorTy())
  919. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  920. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  921. Value *args[] = {opArg, src};
  922. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  923. hlslOP, Builder);
  924. }
  925. // WaveMatch(val<n>)->uint4
  926. Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  927. HLOperationLowerHelper &Helper,
  928. HLObjectOperationLowerHelper *ObjHelper,
  929. bool &Translated) {
  930. hlsl::OP *Op = &Helper.hlslOP;
  931. IRBuilder<> Builder(CI);
  932. // Generate a dx.op.waveMatch call for each scalar in the input, and perform
  933. // a bitwise AND between each result to derive the final bitmask in the case
  934. // of vector inputs.
  935. // (1) Collect the list of all scalar inputs (e.g. decompose vectors)
  936. SmallVector<Value *, 4> ScalarInputs;
  937. Value *Val = CI->getArgOperand(1);
  938. Type *ValTy = Val->getType();
  939. Type *EltTy = ValTy->getScalarType();
  940. if (ValTy->isVectorTy()) {
  941. for (uint64_t i = 0, e = ValTy->getVectorNumElements(); i != e; ++i) {
  942. Value *Elt = Builder.CreateExtractElement(Val, i);
  943. ScalarInputs.push_back(Elt);
  944. }
  945. } else {
  946. ScalarInputs.push_back(Val);
  947. }
  948. Value *Res = nullptr;
  949. Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch);
  950. Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy);
  951. // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the
  952. // first scalar, then AND the result with the accumulator.
  953. for (unsigned i = 0, e = ScalarInputs.size(); i != e; ++i) {
  954. Value *Args[] = { OpcArg, ScalarInputs[i] };
  955. Value *Call = Builder.CreateCall(Fn, Args);
  956. if (Res) {
  957. // Generate bitwise AND of the components
  958. for (unsigned j = 0; j != 4; ++j) {
  959. Value *ResVal = Builder.CreateExtractValue(Res, j);
  960. Value *CallVal = Builder.CreateExtractValue(Call, j);
  961. Value *And = Builder.CreateAnd(ResVal, CallVal);
  962. Res = Builder.CreateInsertValue(Res, And, j);
  963. }
  964. } else {
  965. Res = Call;
  966. }
  967. }
  968. // (3) Convert the final aggregate into a vector to make the types match
  969. Value *ResVec = UndefValue::get(CI->getType());
  970. for (unsigned i = 0; i != 4; ++i) {
  971. Value *Elt = Builder.CreateExtractValue(Res, i);
  972. ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
  973. }
  974. return ResVec;
  975. }
  976. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  977. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  978. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  979. hlsl::OP *hlslOP = &helper.hlslOP;
  980. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  981. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  982. }
  983. // Wave ballot intrinsic.
  984. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  985. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  986. // The high-level operation is uint4 ballot(i1).
  987. // The DXIL operation is struct.u4 ballot(i1).
  988. // To avoid updating users with more than a simple replace, we translate into
  989. // a call into struct.u4, then reassemble the vector.
  990. // Scalarization and constant propagation take care of cleanup.
  991. IRBuilder<> B(CI);
  992. // Make the DXIL call itself.
  993. hlsl::OP *hlslOP = &helper.hlslOP;
  994. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  995. Value *refArgs[] = { opArg, CI->getOperand(1) };
  996. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  997. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  998. // Assign from the call results into a vector.
  999. Type *ResTy = CI->getType();
  1000. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  1001. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  1002. dxilVal->getType()->getNumContainedTypes() == 4);
  1003. // 'x' component is the first vector element, highest bits.
  1004. Value *ResVal = llvm::UndefValue::get(ResTy);
  1005. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  1006. ResVal = B.CreateInsertElement(
  1007. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  1008. }
  1009. return ResVal;
  1010. }
  1011. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  1012. return opcode == OP::OpCode::WaveActiveOp ||
  1013. opcode == OP::OpCode::WavePrefixOp;
  1014. }
  1015. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  1016. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  1017. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  1018. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  1019. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  1020. IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct ||
  1021. IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum ||
  1022. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  1023. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  1024. return (unsigned)DXIL::SignedOpKind::Unsigned;
  1025. return (unsigned)DXIL::SignedOpKind::Signed;
  1026. }
  1027. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  1028. switch (IOP) {
  1029. // Bit operations.
  1030. case IntrinsicOp::IOP_WaveActiveBitOr:
  1031. return (unsigned)DXIL::WaveBitOpKind::Or;
  1032. case IntrinsicOp::IOP_WaveActiveBitAnd:
  1033. return (unsigned)DXIL::WaveBitOpKind::And;
  1034. case IntrinsicOp::IOP_WaveActiveBitXor:
  1035. return (unsigned)DXIL::WaveBitOpKind::Xor;
  1036. // Prefix operations.
  1037. case IntrinsicOp::IOP_WavePrefixSum:
  1038. case IntrinsicOp::IOP_WavePrefixUSum:
  1039. return (unsigned)DXIL::WaveOpKind::Sum;
  1040. case IntrinsicOp::IOP_WavePrefixProduct:
  1041. case IntrinsicOp::IOP_WavePrefixUProduct:
  1042. return (unsigned)DXIL::WaveOpKind::Product;
  1043. // Numeric operations.
  1044. case IntrinsicOp::IOP_WaveActiveMax:
  1045. case IntrinsicOp::IOP_WaveActiveUMax:
  1046. return (unsigned)DXIL::WaveOpKind::Max;
  1047. case IntrinsicOp::IOP_WaveActiveMin:
  1048. case IntrinsicOp::IOP_WaveActiveUMin:
  1049. return (unsigned)DXIL::WaveOpKind::Min;
  1050. case IntrinsicOp::IOP_WaveActiveSum:
  1051. case IntrinsicOp::IOP_WaveActiveUSum:
  1052. return (unsigned)DXIL::WaveOpKind::Sum;
  1053. case IntrinsicOp::IOP_WaveActiveProduct:
  1054. case IntrinsicOp::IOP_WaveActiveUProduct:
  1055. // MultiPrefix operations
  1056. case IntrinsicOp::IOP_WaveMultiPrefixBitAnd:
  1057. return (unsigned)DXIL::WaveMultiPrefixOpKind::And;
  1058. case IntrinsicOp::IOP_WaveMultiPrefixBitOr:
  1059. return (unsigned)DXIL::WaveMultiPrefixOpKind::Or;
  1060. case IntrinsicOp::IOP_WaveMultiPrefixBitXor:
  1061. return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor;
  1062. case IntrinsicOp::IOP_WaveMultiPrefixProduct:
  1063. case IntrinsicOp::IOP_WaveMultiPrefixUProduct:
  1064. return (unsigned)DXIL::WaveMultiPrefixOpKind::Product;
  1065. case IntrinsicOp::IOP_WaveMultiPrefixSum:
  1066. case IntrinsicOp::IOP_WaveMultiPrefixUSum:
  1067. return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum;
  1068. default:
  1069. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  1070. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  1071. "else caller passed incorrect value");
  1072. return (unsigned)DXIL::WaveOpKind::Product;
  1073. }
  1074. }
  1075. // Wave intrinsics of the form fn(valA)->valA
  1076. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1077. HLOperationLowerHelper &helper,
  1078. HLObjectOperationLowerHelper *pObjHelper,
  1079. bool &Translated) {
  1080. hlsl::OP *hlslOP = &helper.hlslOP;
  1081. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1082. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1083. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  1084. unsigned refArgCount = _countof(refArgs);
  1085. if (!WaveIntrinsicNeedsSign(opcode))
  1086. refArgCount--;
  1087. return TrivialDxilOperation(opcode,
  1088. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  1089. CI->getOperand(1)->getType(), CI, hlslOP);
  1090. }
  1091. // WaveMultiPrefixOP(val<n>, mask) -> val<n>
  1092. Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  1093. HLOperationLowerHelper &Helper,
  1094. HLObjectOperationLowerHelper *ObjHelper,
  1095. bool &Translated) {
  1096. hlsl::OP *Op = &Helper.hlslOP;
  1097. Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1098. Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1099. // Decompose mask into scalars
  1100. IRBuilder<> Builder(CI);
  1101. Value *Mask = CI->getArgOperand(2);
  1102. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1103. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1104. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1105. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1106. Value *Args[] = { nullptr, CI->getOperand(1),
  1107. Mask0, Mask1, Mask2, Mask3, KindValInt, SignValInt };
  1108. return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op);
  1109. }
  1110. // WaveMultiPrefixBitCount(i1, mask) -> i32
  1111. Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP,
  1112. OP::OpCode Opc,
  1113. HLOperationLowerHelper &Helper,
  1114. HLObjectOperationLowerHelper *ObjHelper,
  1115. bool &Translated) {
  1116. hlsl::OP *Op = &Helper.hlslOP;
  1117. // Decompose mask into scalars
  1118. IRBuilder<> Builder(CI);
  1119. Value *Mask = CI->getArgOperand(2);
  1120. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1121. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1122. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1123. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1124. Value *Args[] = { nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3 };
  1125. return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op);
  1126. }
  1127. // Wave intrinsics of the form fn()->val
  1128. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1129. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1130. hlsl::OP *hlslOP = &helper.hlslOP;
  1131. Value *refArgs[] = {nullptr};
  1132. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1133. }
  1134. // Wave intrinsics of the form fn(val,lane)->val
  1135. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1136. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1137. hlsl::OP *hlslOP = &helper.hlslOP;
  1138. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  1139. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  1140. CI->getOperand(1)->getType(), CI, hlslOP);
  1141. }
  1142. // Wave intrinsics of the form fn(val)->val
  1143. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  1144. OP::OpCode opcode,
  1145. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1146. hlsl::OP *hlslOP = &helper.hlslOP;
  1147. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1148. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  1149. CI->getOperand(1)->getType(), CI, hlslOP);
  1150. }
  1151. Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1152. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1153. hlsl::OP *hlslOP = &helper.hlslOP;
  1154. Type *pOverloadTy = CI->getType()->getScalarType();
  1155. if (pOverloadTy->isFloatingPointTy()) {
  1156. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1157. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  1158. hlslOP);
  1159. } else {
  1160. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1161. IRBuilder<> Builder(CI);
  1162. Value *neg = Builder.CreateNeg(src);
  1163. return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
  1164. Builder);
  1165. }
  1166. }
  1167. Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1168. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1169. return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
  1170. }
  1171. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  1172. Type *Ty = val->getType();
  1173. Type *EltTy = Ty->getScalarType();
  1174. Constant *zero = nullptr;
  1175. if (EltTy->isFloatingPointTy())
  1176. zero = ConstantFP::get(EltTy, 0);
  1177. else
  1178. zero = ConstantInt::get(EltTy, 0);
  1179. if (Ty != EltTy) {
  1180. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1181. }
  1182. if (EltTy->isFloatingPointTy())
  1183. return Builder.CreateFCmpUNE(val, zero);
  1184. else
  1185. return Builder.CreateICmpNE(val, zero);
  1186. }
  1187. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  1188. Value *cond = GenerateCmpNEZero(val, Builder);
  1189. Type *Ty = val->getType();
  1190. Type *EltTy = Ty->getScalarType();
  1191. if (Ty != EltTy) {
  1192. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1193. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1194. Value *Elt = Builder.CreateExtractElement(cond, i);
  1195. Result = Builder.CreateAnd(Result, Elt);
  1196. }
  1197. return Result;
  1198. } else
  1199. return cond;
  1200. }
  1201. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1202. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1203. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1204. IRBuilder<> Builder(CI);
  1205. return TranslateAllForValue(val, Builder);
  1206. }
  1207. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1208. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1209. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1210. IRBuilder<> Builder(CI);
  1211. Value *cond = GenerateCmpNEZero(val, Builder);
  1212. Type *Ty = val->getType();
  1213. Type *EltTy = Ty->getScalarType();
  1214. if (Ty != EltTy) {
  1215. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1216. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1217. Value *Elt = Builder.CreateExtractElement(cond, i);
  1218. Result = Builder.CreateOr(Result, Elt);
  1219. }
  1220. return Result;
  1221. } else
  1222. return cond;
  1223. }
  1224. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1225. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1226. Type *Ty = CI->getType();
  1227. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1228. IRBuilder<> Builder(CI);
  1229. return Builder.CreateBitCast(op, Ty);
  1230. }
  1231. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1232. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1233. Type *Ty = x->getType();
  1234. Type *outTy = lo->getType()->getPointerElementType();
  1235. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1236. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1237. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1238. if (Ty->isVectorTy()) {
  1239. Value *retValLo = llvm::UndefValue::get(outTy);
  1240. Value *retValHi = llvm::UndefValue::get(outTy);
  1241. unsigned vecSize = Ty->getVectorNumElements();
  1242. for (unsigned i = 0; i < vecSize; i++) {
  1243. Value *Elt = Builder.CreateExtractElement(x, i);
  1244. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1245. hlslOP->GetOpCodeName(opcode));
  1246. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1247. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1248. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1249. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1250. }
  1251. Builder.CreateStore(retValLo, lo);
  1252. Builder.CreateStore(retValHi, hi);
  1253. } else {
  1254. Value *retVal =
  1255. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1256. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1257. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1258. Builder.CreateStore(retValLo, lo);
  1259. Builder.CreateStore(retValHi, hi);
  1260. }
  1261. return nullptr;
  1262. }
  1263. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1264. HLOperationLowerHelper &helper,
  1265. HLObjectOperationLowerHelper *pObjHelper,
  1266. bool &Translated) {
  1267. if (CI->getNumArgOperands() == 2) {
  1268. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1269. } else {
  1270. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1271. hlsl::OP *hlslOP = &helper.hlslOP;
  1272. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1273. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1274. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1275. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1276. IRBuilder<> Builder(CI);
  1277. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1278. }
  1279. }
  1280. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1281. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1282. hlsl::OP *hlslOP = &helper.hlslOP;
  1283. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1284. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1285. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1286. IRBuilder<> Builder(CI);
  1287. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1288. }
  1289. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1290. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1291. hlsl::OP *hlslOP = &helper.hlslOP;
  1292. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1293. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1294. IRBuilder<> Builder(CI);
  1295. Value *tan = Builder.CreateFDiv(y, x);
  1296. Value *atan =
  1297. TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1298. // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
  1299. Type *Ty = x->getType();
  1300. Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
  1301. Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
  1302. Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
  1303. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1304. if (Ty->isVectorTy()) {
  1305. unsigned vecSize = Ty->getVectorNumElements();
  1306. pi = ConstantVector::getSplat(vecSize, pi);
  1307. halfPi = ConstantVector::getSplat(vecSize, halfPi);
  1308. negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
  1309. zero = ConstantVector::getSplat(vecSize, zero);
  1310. }
  1311. Value *atanAddPi = Builder.CreateFAdd(atan, pi);
  1312. Value *atanSubPi = Builder.CreateFSub(atan, pi);
  1313. // x > 0 -> atan.
  1314. Value *result = atan;
  1315. Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
  1316. Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
  1317. Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
  1318. Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
  1319. // x < 0, y >= 0 -> atan + pi.
  1320. Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
  1321. result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
  1322. // x < 0, y < 0 -> atan - pi.
  1323. Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
  1324. result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
  1325. // x == 0, y < 0 -> -pi/2
  1326. Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
  1327. result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
  1328. // x == 0, y > 0 -> pi/2
  1329. Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
  1330. result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
  1331. return result;
  1332. }
  1333. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1334. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1335. hlsl::OP *hlslOP = &helper.hlslOP;
  1336. Type *Ty = CI->getType();
  1337. Type *EltTy = Ty->getScalarType();
  1338. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1339. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1340. if (IOP == IntrinsicOp::IOP_uclamp) {
  1341. maxOp = DXIL::OpCode::UMax;
  1342. minOp = DXIL::OpCode::UMin;
  1343. } else if (EltTy->isIntegerTy()) {
  1344. maxOp = DXIL::OpCode::IMax;
  1345. minOp = DXIL::OpCode::IMin;
  1346. }
  1347. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1348. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1349. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1350. IRBuilder<> Builder(CI);
  1351. // min(max(x, minVal), maxVal).
  1352. Value *maxXMinVal =
  1353. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1354. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1355. }
  1356. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1357. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1358. hlsl::OP *hlslOP = &helper.hlslOP;
  1359. Function *discard =
  1360. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1361. IRBuilder<> Builder(CI);
  1362. Value *cond = nullptr;
  1363. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1364. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1365. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1366. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1367. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1368. Value *elt = Builder.CreateExtractElement(arg, i);
  1369. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1370. cond = Builder.CreateOr(cond, eltCond);
  1371. }
  1372. } else
  1373. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1374. /*If discard condition evaluates to false at compile-time, then
  1375. don't emit the discard instruction.*/
  1376. if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond))
  1377. if (!constCond->getLimitedValue())
  1378. return nullptr;
  1379. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1380. Builder.CreateCall(discard, {opArg, cond});
  1381. return nullptr;
  1382. }
  1383. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1384. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1385. VectorType *VT = cast<VectorType>(CI->getType());
  1386. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1387. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1388. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1389. IRBuilder<> Builder(CI);
  1390. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1391. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1392. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1393. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1394. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1395. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1396. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1397. Value *xy = Builder.CreateFMul(x0, y1);
  1398. Value *yx = Builder.CreateFMul(y0, x1);
  1399. return Builder.CreateFSub(xy, yx);
  1400. };
  1401. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1402. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1403. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1404. Value *cross = UndefValue::get(VT);
  1405. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1406. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1407. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1408. return cross;
  1409. }
  1410. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1411. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1412. IRBuilder<> Builder(CI);
  1413. Type *Ty = CI->getType();
  1414. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1415. // 180/pi.
  1416. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1417. if (Ty != Ty->getScalarType()) {
  1418. toDegreeConst =
  1419. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1420. }
  1421. return Builder.CreateFMul(toDegreeConst, val);
  1422. }
  1423. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1424. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1425. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1426. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1427. Type *Ty = src1->getType();
  1428. IRBuilder<> Builder(CI);
  1429. Value *Result = UndefValue::get(Ty);
  1430. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1431. // dest.x = 1;
  1432. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1433. // dest.y = src0.y * src1.y;
  1434. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1435. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1436. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1437. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1438. // dest.z = src0.z;
  1439. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1440. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1441. // dest.w = src1.w;
  1442. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1443. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1444. return Result;
  1445. }
  1446. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1447. HLOperationLowerHelper &helper,
  1448. HLObjectOperationLowerHelper *pObjHelper,
  1449. bool &Translated) {
  1450. Value *firstbitHi =
  1451. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1452. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1453. IRBuilder<> Builder(CI);
  1454. Constant *neg1 = Builder.getInt32(-1);
  1455. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1456. Type *Ty = src->getType();
  1457. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1458. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1459. if (Ty == Ty->getScalarType()) {
  1460. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1461. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1462. return Builder.CreateSelect(cond, neg1, sub);
  1463. } else {
  1464. Value *result = UndefValue::get(CI->getType());
  1465. unsigned vecSize = Ty->getVectorNumElements();
  1466. for (unsigned i = 0; i < vecSize; i++) {
  1467. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1468. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1469. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1470. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1471. result = Builder.CreateInsertElement(result, Elt, i);
  1472. }
  1473. return result;
  1474. }
  1475. }
  1476. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1477. HLOperationLowerHelper &helper,
  1478. HLObjectOperationLowerHelper *pObjHelper,
  1479. bool &Translated) {
  1480. Value *firstbitLo =
  1481. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1482. return firstbitLo;
  1483. }
  1484. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1485. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1486. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1487. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1488. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1489. IRBuilder<> Builder(CI);
  1490. Type *Ty = m->getType();
  1491. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1492. // Result = (ambient, diffuse, specular, 1)
  1493. // ambient = 1.
  1494. Constant *oneConst = ConstantFP::get(Ty, 1);
  1495. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1496. // Result.w = 1.
  1497. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1498. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1499. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1500. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1501. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1502. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1503. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
  1504. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1505. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1506. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  1507. Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
  1508. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
  1509. Result = Builder.CreateInsertElement(Result, spec, 2);
  1510. return Result;
  1511. }
  1512. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1513. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1514. IRBuilder<> Builder(CI);
  1515. Type *Ty = CI->getType();
  1516. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1517. // pi/180.
  1518. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1519. if (Ty != Ty->getScalarType()) {
  1520. toRadianConst =
  1521. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1522. }
  1523. return Builder.CreateFMul(toRadianConst, val);
  1524. }
  1525. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1526. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1527. IRBuilder<> Builder(CI);
  1528. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1529. Type *Ty = CI->getType();
  1530. Function *f16tof32 =
  1531. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1532. return TrivialDxilOperation(
  1533. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1534. x->getType(), Ty, &helper.hlslOP, Builder);
  1535. }
  1536. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1537. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1538. IRBuilder<> Builder(CI);
  1539. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1540. Type *Ty = CI->getType();
  1541. Function *f32tof16 =
  1542. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1543. return TrivialDxilOperation(
  1544. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1545. x->getType(), Ty, &helper.hlslOP, Builder);
  1546. }
  1547. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1548. IRBuilder<> Builder(CI);
  1549. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1550. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1551. unsigned size = VT->getNumElements();
  1552. if (size > 1) {
  1553. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1554. for (unsigned i = 1; i < size; i++) {
  1555. Elt = Builder.CreateExtractElement(val, i);
  1556. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1557. Sum = Builder.CreateFAdd(Sum, Mul);
  1558. }
  1559. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1560. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1561. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1562. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1563. hlslOP->GetOpCodeName(sqrt));
  1564. } else {
  1565. val = Elt;
  1566. }
  1567. }
  1568. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1569. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1570. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1571. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1572. hlslOP->GetOpCodeName(fabs));
  1573. }
  1574. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1575. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1576. hlsl::OP *hlslOP = &helper.hlslOP;
  1577. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1578. return TranslateLength(CI, val, hlslOP);
  1579. }
  1580. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1581. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1582. hlsl::OP *hlslOP = &helper.hlslOP;
  1583. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1584. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1585. IRBuilder<> Builder(CI);
  1586. Value *intP =
  1587. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1588. Value *fracP = Builder.CreateFSub(val, intP);
  1589. Builder.CreateStore(intP, outIntPtr);
  1590. return fracP;
  1591. }
  1592. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1593. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1594. hlsl::OP *hlslOP = &helper.hlslOP;
  1595. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1596. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1597. IRBuilder<> Builder(CI);
  1598. Value *sub = Builder.CreateFSub(src0, src1);
  1599. return TranslateLength(CI, sub, hlslOP);
  1600. }
  1601. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1602. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1603. hlsl::OP *hlslOP = &helper.hlslOP;
  1604. IRBuilder<> Builder(CI);
  1605. Type *Ty = CI->getType();
  1606. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1607. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1608. if (Ty != Ty->getScalarType()) {
  1609. log2eConst =
  1610. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1611. }
  1612. val = Builder.CreateFMul(log2eConst, val);
  1613. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1614. return exp;
  1615. }
  1616. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1617. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1618. hlsl::OP *hlslOP = &helper.hlslOP;
  1619. IRBuilder<> Builder(CI);
  1620. Type *Ty = CI->getType();
  1621. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1622. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1623. if (Ty != Ty->getScalarType()) {
  1624. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1625. }
  1626. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1627. return Builder.CreateFMul(ln2Const, log);
  1628. }
  1629. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1630. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1631. hlsl::OP *hlslOP = &helper.hlslOP;
  1632. IRBuilder<> Builder(CI);
  1633. Type *Ty = CI->getType();
  1634. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1635. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1636. if (Ty != Ty->getScalarType()) {
  1637. log2_10Const =
  1638. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1639. }
  1640. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1641. return Builder.CreateFMul(log2_10Const, log);
  1642. }
  1643. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1644. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1645. hlsl::OP *hlslOP = &helper.hlslOP;
  1646. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1647. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1648. IRBuilder<> Builder(CI);
  1649. Value *div = Builder.CreateFDiv(src0, src1);
  1650. Value *negDiv = Builder.CreateFNeg(div);
  1651. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1652. Value *absDiv =
  1653. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1654. Value *frc =
  1655. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1656. Value *negFrc = Builder.CreateFNeg(frc);
  1657. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1658. return Builder.CreateFMul(realFrc, src1);
  1659. }
  1660. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1661. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1662. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1663. if (isFloat) {
  1664. switch (IOP) {
  1665. case IntrinsicOp::IOP_max:
  1666. opcode = OP::OpCode::FMax;
  1667. break;
  1668. case IntrinsicOp::IOP_min:
  1669. default:
  1670. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
  1671. opcode = OP::OpCode::FMin;
  1672. break;
  1673. }
  1674. }
  1675. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1676. }
  1677. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1678. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1679. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1680. if (isFloat) {
  1681. switch (IOP) {
  1682. case IntrinsicOp::IOP_mad:
  1683. default:
  1684. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
  1685. opcode = OP::OpCode::FMad;
  1686. break;
  1687. }
  1688. }
  1689. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1690. }
  1691. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1692. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1693. hlsl::OP *hlslOP = &helper.hlslOP;
  1694. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1695. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1696. IRBuilder<> Builder(CI);
  1697. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1698. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1699. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1700. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1701. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1702. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1703. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1704. // int iVal = asint(val);
  1705. Type *dstTy = i32Ty;
  1706. Type *Ty = val->getType();
  1707. if (Ty->isVectorTy()) {
  1708. unsigned vecSize = Ty->getVectorNumElements();
  1709. dstTy = VectorType::get(i32Ty, vecSize);
  1710. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1711. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1712. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1713. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1714. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1715. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1716. }
  1717. // bool ne = val != 0;
  1718. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1719. notZero = Builder.CreateSExt(notZero, dstTy);
  1720. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1721. // temp = intVal & exponentMask;
  1722. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1723. // temp = temp + exponentBias;
  1724. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1725. // temp = temp & ne;
  1726. temp = Builder.CreateAnd(temp, notZero);
  1727. // temp = temp >> exponentShift;
  1728. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1729. // exp = float(temp);
  1730. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1731. Builder.CreateStore(exp, expPtr);
  1732. // temp = iVal & mantisaMask;
  1733. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1734. // temp = temp | mantisaOr;
  1735. temp = Builder.CreateOr(temp, mantisaOrConst);
  1736. // mantisa = temp & ne;
  1737. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1738. return Builder.CreateBitCast(mantisa, Ty);
  1739. }
  1740. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1741. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1742. hlsl::OP *hlslOP = &helper.hlslOP;
  1743. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1744. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1745. IRBuilder<> Builder(CI);
  1746. Value *exp =
  1747. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1748. return Builder.CreateFMul(exp, src0);
  1749. }
  1750. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1751. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1752. hlsl::OP *hlslOP = &helper.hlslOP;
  1753. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1754. IRBuilder<> Builder(CI);
  1755. Value *ddx =
  1756. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1757. Value *absDdx =
  1758. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1759. Value *ddy =
  1760. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1761. Value *absDdy =
  1762. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1763. return Builder.CreateFAdd(absDdx, absDdy);
  1764. }
  1765. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1766. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1767. // x + s(y-x)
  1768. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1769. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1770. IRBuilder<> Builder(CI);
  1771. Value *ySubx = Builder.CreateFSub(y, x);
  1772. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1773. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1774. return Builder.CreateFAdd(x, sMulSub);
  1775. }
  1776. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1777. Value *src1, hlsl::OP *hlslOP,
  1778. IRBuilder<> &Builder) {
  1779. Type *Ty = src0->getType()->getScalarType();
  1780. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1781. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1782. SmallVector<Value *, 9> args;
  1783. args.emplace_back(opArg);
  1784. unsigned vecSize = src0->getType()->getVectorNumElements();
  1785. for (unsigned i = 0; i < vecSize; i++)
  1786. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1787. for (unsigned i = 0; i < vecSize; i++)
  1788. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1789. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1790. return dotOP;
  1791. }
  1792. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder, bool Unsigned = false) {
  1793. auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
  1794. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1795. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1796. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1797. for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
  1798. Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
  1799. Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
  1800. Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, Builder);
  1801. }
  1802. return Result;
  1803. }
  1804. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1805. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1806. switch (vecSize) {
  1807. case 2:
  1808. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1809. break;
  1810. case 3:
  1811. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1812. break;
  1813. case 4:
  1814. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1815. break;
  1816. default:
  1817. DXASSERT(vecSize == 1, "wrong vector size");
  1818. {
  1819. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1820. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1821. }
  1822. break;
  1823. }
  1824. }
  1825. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1826. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1827. hlsl::OP *hlslOP = &helper.hlslOP;
  1828. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1829. Type *Ty = arg0->getType();
  1830. unsigned vecSize = Ty->getVectorNumElements();
  1831. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1832. IRBuilder<> Builder(CI);
  1833. if (Ty->getScalarType()->isFloatingPointTy()) {
  1834. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1835. } else {
  1836. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1837. }
  1838. }
  1839. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1840. HLOperationLowerHelper &helper,
  1841. HLObjectOperationLowerHelper *pObjHelper,
  1842. bool &Translated) {
  1843. hlsl::OP *hlslOP = &helper.hlslOP;
  1844. Type *Ty = CI->getType();
  1845. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1846. VectorType *VT = cast<VectorType>(Ty);
  1847. unsigned vecSize = VT->getNumElements();
  1848. IRBuilder<> Builder(CI);
  1849. Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
  1850. DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
  1851. Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
  1852. Value *rsqrt = Builder.CreateCall(
  1853. dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
  1854. hlslOP->GetOpCodeName(rsqrtOp));
  1855. Value *vecRsqrt = UndefValue::get(VT);
  1856. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1857. vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
  1858. return Builder.CreateFMul(op, vecRsqrt);
  1859. }
  1860. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1861. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1862. hlsl::OP *hlslOP = &helper.hlslOP;
  1863. // v = i - 2 * n * dot(i, n).
  1864. IRBuilder<> Builder(CI);
  1865. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1866. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1867. VectorType *VT = cast<VectorType>(i->getType());
  1868. unsigned vecSize = VT->getNumElements();
  1869. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1870. // 2 * dot (i, n).
  1871. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1872. // 2 * n * dot(i, n).
  1873. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1874. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1875. // i - 2 * n * dot(i, n).
  1876. return Builder.CreateFSub(i, nMulDot);
  1877. }
  1878. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1879. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1880. hlsl::OP *hlslOP = &helper.hlslOP;
  1881. // d = dot(i, n);
  1882. // t = 1 - eta * eta * ( 1 - d*d);
  1883. // cond = t >= 1;
  1884. // r = eta * i - (eta * d + sqrt(t)) * n;
  1885. // return cond ? r : 0;
  1886. IRBuilder<> Builder(CI);
  1887. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1888. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1889. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1890. VectorType *VT = cast<VectorType>(i->getType());
  1891. unsigned vecSize = VT->getNumElements();
  1892. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1893. // eta * eta;
  1894. Value *eta2 = Builder.CreateFMul(eta, eta);
  1895. // d*d;
  1896. Value *dot2 = Builder.CreateFMul(dot, dot);
  1897. Constant *one = ConstantFP::get(eta->getType(), 1);
  1898. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1899. // 1- d*d;
  1900. dot2 = Builder.CreateFSub(one, dot2);
  1901. // eta * eta * (1-d*d);
  1902. eta2 = Builder.CreateFMul(dot2, eta2);
  1903. // t = 1 - eta * eta * ( 1 - d*d);
  1904. Value *t = Builder.CreateFSub(one, eta2);
  1905. // cond = t >= 0;
  1906. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1907. // eta * i;
  1908. Value *vecEta = UndefValue::get(VT);
  1909. for (unsigned i = 0; i < vecSize; i++)
  1910. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1911. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1912. // sqrt(t);
  1913. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1914. // eta * d;
  1915. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1916. // eta * d + sqrt(t);
  1917. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1918. // (eta * d + sqrt(t)) * n;
  1919. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1920. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1921. // r = eta * i - (eta * d + sqrt(t)) * n;
  1922. r = Builder.CreateFSub(etaMulI, r);
  1923. Value *refract =
  1924. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1925. return refract;
  1926. }
  1927. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1928. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1929. hlsl::OP *hlslOP = &helper.hlslOP;
  1930. // s = saturate((x-min)/(max-min)).
  1931. IRBuilder<> Builder(CI);
  1932. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1933. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1934. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1935. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1936. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1937. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1938. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1939. Builder);
  1940. // return s * s *(3-2*s).
  1941. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1942. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1943. Value *sMul2 = Builder.CreateFMul(s, c2);
  1944. Value *result = Builder.CreateFSub(c3, sMul2);
  1945. result = Builder.CreateFMul(s, result);
  1946. result = Builder.CreateFMul(s, result);
  1947. return result;
  1948. }
  1949. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1950. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1951. hlsl::OP *hlslOP = &helper.hlslOP;
  1952. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1953. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1954. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1955. Type *Ty = CI->getType();
  1956. IRBuilder<> Builder(CI);
  1957. Value *vecRef = UndefValue::get(Ty);
  1958. for (unsigned i = 0; i < 4; i++)
  1959. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1960. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1961. Value *srcY = Builder.CreateExtractElement(src, 1);
  1962. Value *byteSrc = UndefValue::get(Ty);
  1963. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1964. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1965. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1966. Value *bfiOpArg =
  1967. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1968. Value *imm8 = hlslOP->GetU32Const(8);
  1969. Value *imm16 = hlslOP->GetU32Const(16);
  1970. Value *imm24 = hlslOP->GetU32Const(24);
  1971. Ty = ref->getType();
  1972. // Get x[31:8].
  1973. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1974. // y[0~7] x[31:8].
  1975. Value *byteSrcElt = TrivialDxilOperation(
  1976. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1977. hlslOP, Builder);
  1978. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  1979. // Get x[31:16].
  1980. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1981. // y[0~15] x[31:16].
  1982. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1983. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  1984. Ty, Ty, hlslOP, Builder);
  1985. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  1986. // Get x[31:24].
  1987. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1988. // y[0~23] x[31:24].
  1989. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1990. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  1991. Ty, Ty, hlslOP, Builder);
  1992. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  1993. // Msad on vecref and byteSrc.
  1994. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  1995. hlslOP, Builder);
  1996. }
  1997. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1998. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1999. Type *Ty = CI->getType();
  2000. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2001. IRBuilder<> Builder(CI);
  2002. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2003. if (Ty != Ty->getScalarType()) {
  2004. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2005. }
  2006. return Builder.CreateFDiv(one, op);
  2007. }
  2008. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2009. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2010. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2011. Type *Ty = val->getType();
  2012. bool IsInt = Ty->getScalarType()->isIntegerTy();
  2013. IRBuilder<> Builder(CI);
  2014. Constant *zero = Constant::getNullValue(Ty);
  2015. Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val) : Builder.CreateFCmpOLT(zero, val);
  2016. Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero) : Builder.CreateFCmpOLT(val, zero);
  2017. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  2018. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  2019. return Builder.CreateSub(zeroLtVal, valLtZero);
  2020. }
  2021. Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2022. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2023. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2024. Type *Ty = val->getType();
  2025. IRBuilder<> Builder(CI);
  2026. Constant *zero = Constant::getNullValue(Ty);
  2027. Value *nonZero = Builder.CreateICmpNE(val, zero);
  2028. return Builder.CreateZExt(nonZero, CI->getType());
  2029. }
  2030. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2031. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2032. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2033. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2034. Type *Ty = CI->getType();
  2035. IRBuilder<> Builder(CI);
  2036. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2037. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2038. Value *cond = Builder.CreateFCmpOLT(x, edge);
  2039. if (Ty != Ty->getScalarType()) {
  2040. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2041. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  2042. }
  2043. return Builder.CreateSelect(cond, zero, one);
  2044. }
  2045. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2046. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2047. hlsl::OP *hlslOP = &helper.hlslOP;
  2048. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2049. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2050. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  2051. IRBuilder<> Builder(CI);
  2052. return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
  2053. }
  2054. Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  2055. HLOperationLowerHelper &helper,
  2056. HLObjectOperationLowerHelper *pObjHelper,
  2057. bool &Translated) {
  2058. Translated = false;
  2059. CI->getContext().emitError(CI, "use of undeclared identifier 'printf'");
  2060. return nullptr;
  2061. }
  2062. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2063. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2064. hlsl::OP *hlslOP = &helper.hlslOP;
  2065. Type *Ty = CI->getType();
  2066. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  2067. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  2068. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  2069. IRBuilder<> Builder(CI);
  2070. unsigned vecSize = Ty->getVectorNumElements();
  2071. // -n x sign(dot(i, ng)).
  2072. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  2073. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2074. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  2075. Value *negN = Builder.CreateFNeg(n);
  2076. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  2077. return faceforward;
  2078. }
  2079. Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2080. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2081. hlsl::OP *hlslOP = &helper.hlslOP;
  2082. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2083. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2084. IRBuilder<> Builder(CI);
  2085. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2086. Value *args[] = { opArg, src0, src1 };
  2087. Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
  2088. Builder.CreateCall(dxilFunc, args);
  2089. return nullptr;
  2090. }
  2091. Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2092. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2093. hlsl::OP *hlslOP = &helper.hlslOP;
  2094. Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX);
  2095. Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY);
  2096. Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ);
  2097. Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload);
  2098. IRBuilder<> Builder(CI);
  2099. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2100. Value *args[] = { opArg, src0, src1, src2, src3 };
  2101. Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType());
  2102. Builder.CreateCall(dxilFunc, args);
  2103. return nullptr;
  2104. }
  2105. }
  2106. // MOP intrinsics
  2107. namespace {
  2108. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2109. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2110. hlsl::OP *hlslOP = &helper.hlslOP;
  2111. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2112. IRBuilder<> Builder(CI);
  2113. Value *sampleIdx =
  2114. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  2115. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  2116. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2117. Function *dxilFunc =
  2118. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2119. Value *args[] = {opArg, handle, sampleIdx};
  2120. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  2121. Value *result = UndefValue::get(CI->getType());
  2122. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  2123. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  2124. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  2125. result = Builder.CreateInsertElement(result, samplePosY, 1);
  2126. return result;
  2127. }
  2128. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2129. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2130. hlsl::OP *hlslOP = &helper.hlslOP;
  2131. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2132. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  2133. IRBuilder<> Builder(CI);
  2134. OP::OpCode opcode = OP::OpCode::GetDimensions;
  2135. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2136. Function *dxilFunc =
  2137. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2138. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  2139. Value *mipLevel = UndefValue::get(i32Ty);
  2140. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  2141. switch (RK) {
  2142. case DxilResource::Kind::Texture1D:
  2143. case DxilResource::Kind::Texture1DArray:
  2144. case DxilResource::Kind::Texture2D:
  2145. case DxilResource::Kind::Texture2DArray:
  2146. case DxilResource::Kind::TextureCube:
  2147. case DxilResource::Kind::TextureCubeArray:
  2148. case DxilResource::Kind::Texture3D: {
  2149. Value *opMipLevel =
  2150. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  2151. // mipLevel is in parameter, should not be pointer.
  2152. if (!opMipLevel->getType()->isPointerTy())
  2153. mipLevel = opMipLevel;
  2154. else {
  2155. // No mip level.
  2156. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2157. mipLevel = ConstantInt::get(i32Ty, 0);
  2158. }
  2159. } break;
  2160. default:
  2161. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2162. break;
  2163. }
  2164. Value *args[] = {opArg, handle, mipLevel};
  2165. Value *dims = Builder.CreateCall(dxilFunc, args);
  2166. unsigned dimensionIdx = 0;
  2167. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  2168. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  2169. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  2170. width = Builder.CreateSIToFP(width,
  2171. widthPtr->getType()->getPointerElementType());
  2172. Builder.CreateStore(width, widthPtr);
  2173. if (DXIL::IsStructuredBuffer(RK)) {
  2174. // Set stride.
  2175. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  2176. const DataLayout &DL = helper.dataLayout;
  2177. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2178. Type *bufTy = pObjHelper->GetResourceType(handle);
  2179. Type *bufRetTy = bufTy->getStructElementType(0);
  2180. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  2181. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  2182. } else {
  2183. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  2184. // Samples is in w channel too.
  2185. RK == DXIL::ResourceKind::Texture2DMS) {
  2186. // Has mip.
  2187. for (unsigned argIdx = widthOpIdx + 1;
  2188. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  2189. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2190. Value *ptr = CI->getArgOperand(argIdx);
  2191. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2192. dim = Builder.CreateSIToFP(dim,
  2193. ptr->getType()->getPointerElementType());
  2194. Builder.CreateStore(dim, ptr);
  2195. }
  2196. // NumOfLevel is in w channel.
  2197. dimensionIdx = 3;
  2198. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  2199. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  2200. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2201. dim =
  2202. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  2203. Builder.CreateStore(dim, ptr);
  2204. } else {
  2205. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  2206. argIdx++) {
  2207. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2208. Value *ptr = CI->getArgOperand(argIdx);
  2209. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2210. dim = Builder.CreateSIToFP(dim,
  2211. ptr->getType()->getPointerElementType());
  2212. Builder.CreateStore(dim, ptr);
  2213. }
  2214. }
  2215. }
  2216. return nullptr;
  2217. }
  2218. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2219. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2220. hlsl::OP *hlslOP = &helper.hlslOP;
  2221. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2222. pObjHelper->MarkHasCounter(handle, helper.i8Ty);
  2223. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  2224. IRBuilder<> Builder(CI);
  2225. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  2226. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  2227. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  2228. // Create BufferUpdateCounter call.
  2229. Value *Args[] = {OpCodeArg, handle, IncVal};
  2230. Function *F =
  2231. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2232. return Builder.CreateCall(F, Args);
  2233. }
  2234. static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2235. // Extract value part.
  2236. Value *retVal = llvm::UndefValue::get(RetTy);
  2237. if (RetTy->isVectorTy()) {
  2238. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2239. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2240. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2241. }
  2242. } else {
  2243. retVal = Builder.CreateExtractValue(ResRet, 0);
  2244. }
  2245. return retVal;
  2246. }
  2247. static Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2248. // Extract value part.
  2249. Value *retVal = llvm::UndefValue::get(RetTy);
  2250. if (RetTy->isVectorTy()) {
  2251. unsigned vecSize = RetTy->getVectorNumElements();
  2252. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2253. for (unsigned i = 0; i < vecSize; i++) {
  2254. Value *retComp = Elts[i];
  2255. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2256. }
  2257. } else {
  2258. retVal = Elts[0];
  2259. }
  2260. return retVal;
  2261. }
  2262. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
  2263. hlsl::OP *hlslOp) {
  2264. if (status && !isa<UndefValue>(status)) {
  2265. Value *statusVal = Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex);
  2266. Value *checkAccessOp = hlslOp->GetI32Const(
  2267. static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
  2268. Function *checkAccessFn = hlslOp->GetOpFunc(
  2269. DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
  2270. // CheckAccess on status.
  2271. Value *bStatus =
  2272. Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
  2273. Value *extStatus =
  2274. Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
  2275. Builder.CreateStore(extStatus, status);
  2276. }
  2277. }
  2278. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2279. Value *Result = UndefValue::get(DstTy);
  2280. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2281. Result = Builder.CreateInsertElement(Result, Elt, i);
  2282. return Result;
  2283. }
  2284. Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2285. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2286. hlsl::OP *hlslOP = &helper.hlslOP;
  2287. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2288. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2289. Type *arg0Ty = arg0->getType();
  2290. Type *arg1Ty = arg1->getType();
  2291. IRBuilder<> Builder(CI);
  2292. if (arg0Ty->isVectorTy()) {
  2293. if (arg1Ty->isVectorTy()) {
  2294. // mul(vector, vector) == dot(vector, vector)
  2295. unsigned vecSize = arg0Ty->getVectorNumElements();
  2296. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2297. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  2298. }
  2299. else {
  2300. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul);
  2301. }
  2302. }
  2303. else {
  2304. // mul(vector, scalar) == vector * scalar-splat
  2305. arg1 = SplatToVector(arg1, arg0Ty, Builder);
  2306. }
  2307. }
  2308. else {
  2309. if (arg1Ty->isVectorTy()) {
  2310. // mul(scalar, vector) == scalar-splat * vector
  2311. arg0 = SplatToVector(arg0, arg1Ty, Builder);
  2312. }
  2313. // else mul(scalar, scalar) == scalar * scalar;
  2314. }
  2315. // create fmul/mul for the pair of vectors or scalars
  2316. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2317. return Builder.CreateFMul(arg0, arg1);
  2318. }
  2319. else {
  2320. return Builder.CreateMul(arg0, arg1);
  2321. }
  2322. }
  2323. // Sample intrinsics.
  2324. struct SampleHelper {
  2325. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2326. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  2327. DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid;
  2328. Value *sampledTexHandle = nullptr;
  2329. Value *texHandle = nullptr;
  2330. Value *samplerHandle = nullptr;
  2331. static const unsigned kMaxCoordDimensions = 4;
  2332. unsigned coordDimensions = 0;
  2333. Value *coord[kMaxCoordDimensions];
  2334. Value *compareValue = nullptr;
  2335. Value *bias = nullptr;
  2336. Value *lod = nullptr;
  2337. // SampleGrad only.
  2338. static const unsigned kMaxDDXYDimensions = 3;
  2339. Value *ddx[kMaxDDXYDimensions];
  2340. Value *ddy[kMaxDDXYDimensions];
  2341. // Optional.
  2342. static const unsigned kMaxOffsetDimensions = 3;
  2343. unsigned offsetDimensions = 0;
  2344. Value *offset[kMaxOffsetDimensions];
  2345. Value *clamp = nullptr;
  2346. Value *status = nullptr;
  2347. unsigned maxHLOperandRead = 0;
  2348. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2349. if (CI->getNumArgOperands() > opIdx) {
  2350. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2351. return CI->getArgOperand(opIdx);
  2352. }
  2353. return nullptr;
  2354. }
  2355. void TranslateCoord(CallInst *CI, unsigned coordIdx) {
  2356. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2357. DXASSERT_NOMSG(coordArg);
  2358. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2359. "otherwise, HL coordinate dimensions mismatch");
  2360. IRBuilder<> Builder(CI);
  2361. for (unsigned i = 0; i < coordDimensions; i++)
  2362. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2363. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2364. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2365. coord[i] = undefF;
  2366. }
  2367. void TranslateOffset(CallInst *CI, unsigned offsetIdx) {
  2368. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2369. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2370. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2371. "otherwise, HL coordinate dimensions mismatch");
  2372. IRBuilder<> Builder(CI);
  2373. for (unsigned i = 0; i < offsetDimensions; i++)
  2374. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2375. } else {
  2376. // Use zeros for offsets when not specified, not undef.
  2377. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2378. for (unsigned i = 0; i < offsetDimensions; i++)
  2379. offset[i] = zero;
  2380. }
  2381. // Use undef for components that should not be used for this resource dim.
  2382. Value *undefI = UndefValue::get(i32Ty);
  2383. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2384. offset[i] = undefI;
  2385. }
  2386. void SetBias(CallInst *CI, unsigned biasIdx) {
  2387. // Clamp bias for immediate.
  2388. bias = ReadHLOperand(CI, biasIdx);
  2389. DXASSERT_NOMSG(bias);
  2390. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2391. float v = FP->getValueAPF().convertToFloat();
  2392. if (v > DXIL::kMaxMipLodBias)
  2393. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2394. if (v < DXIL::kMinMipLodBias)
  2395. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2396. }
  2397. }
  2398. void SetLOD(CallInst *CI, unsigned lodIdx) {
  2399. lod = ReadHLOperand(CI, lodIdx);
  2400. DXASSERT_NOMSG(lod);
  2401. }
  2402. void SetCompareValue(CallInst *CI, unsigned cmpIdx) {
  2403. compareValue = ReadHLOperand(CI, cmpIdx);
  2404. DXASSERT_NOMSG(compareValue);
  2405. }
  2406. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2407. if ((clamp = ReadHLOperand(CI, clampIdx))) {
  2408. if (clamp->getType()->isVectorTy()) {
  2409. IRBuilder<> Builder(CI);
  2410. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2411. }
  2412. } else
  2413. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2414. }
  2415. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2416. status = ReadHLOperand(CI, statusIdx);
  2417. }
  2418. void SetDDX(CallInst *CI, unsigned ddxIdx) {
  2419. SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx));
  2420. }
  2421. void SetDDY(CallInst *CI, unsigned ddyIdx) {
  2422. SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx));
  2423. }
  2424. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) {
  2425. DXASSERT_NOMSG(ddxyArg);
  2426. IRBuilder<> Builder(CI);
  2427. unsigned ddxySize = ddxyArg->getType()->getVectorNumElements();
  2428. for (unsigned i = 0; i < ddxySize; i++)
  2429. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2430. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2431. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2432. ddxy[i] = undefF;
  2433. }
  2434. };
  2435. SampleHelper::SampleHelper(
  2436. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2437. : opcode(op) {
  2438. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2439. resourceKind = pObjHelper->GetRK(texHandle);
  2440. if (resourceKind == DXIL::ResourceKind::Invalid) {
  2441. opcode = DXIL::OpCode::NumOpCodes;
  2442. return;
  2443. }
  2444. coordDimensions = opcode == DXIL::OpCode::CalculateLOD ? DxilResource::GetNumDimensionsForCalcLOD(resourceKind)
  2445. : DxilResource::GetNumCoords(resourceKind);
  2446. offsetDimensions = DxilResource::GetNumOffsets(resourceKind);
  2447. const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op);
  2448. sampledTexHandle = bFeedbackOp ? CI->getArgOperand(HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex)
  2449. : nullptr;
  2450. const unsigned kSamplerArgIndex = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex
  2451. : HLOperandIndex::kSampleSamplerArgIndex;
  2452. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2453. const unsigned kCoordArgIdx = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex
  2454. : HLOperandIndex::kSampleCoordArgIndex;
  2455. TranslateCoord(CI, kCoordArgIdx);
  2456. // TextureCube does not support offsets, shifting each subsequent arg index down by 1
  2457. unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube ||
  2458. resourceKind == DXIL::ResourceKind::TextureCubeArray)
  2459. ? 1 : 0;
  2460. switch (op) {
  2461. case OP::OpCode::Sample:
  2462. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleOffsetArgIndex);
  2463. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube);
  2464. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube);
  2465. break;
  2466. case OP::OpCode::SampleLevel:
  2467. SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex);
  2468. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleLOffsetArgIndex);
  2469. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube);
  2470. break;
  2471. case OP::OpCode::SampleBias:
  2472. SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex);
  2473. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleBOffsetArgIndex);
  2474. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube);
  2475. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube);
  2476. break;
  2477. case OP::OpCode::SampleCmp:
  2478. SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
  2479. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpOffsetArgIndex);
  2480. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube);
  2481. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
  2482. break;
  2483. case OP::OpCode::SampleCmpLevelZero:
  2484. SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2485. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpLZOffsetArgIndex);
  2486. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube);
  2487. break;
  2488. case OP::OpCode::SampleGrad:
  2489. SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex);
  2490. SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex);
  2491. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleGOffsetArgIndex);
  2492. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube);
  2493. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube);
  2494. break;
  2495. case OP::OpCode::CalculateLOD:
  2496. // Only need coord for LOD calculation.
  2497. break;
  2498. case OP::OpCode::WriteSamplerFeedback:
  2499. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex);
  2500. break;
  2501. case OP::OpCode::WriteSamplerFeedbackBias:
  2502. SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex);
  2503. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex);
  2504. break;
  2505. case OP::OpCode::WriteSamplerFeedbackGrad:
  2506. SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex);
  2507. SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex);
  2508. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex);
  2509. break;
  2510. case OP::OpCode::WriteSamplerFeedbackLevel:
  2511. SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex);
  2512. break;
  2513. default:
  2514. DXASSERT(0, "invalid opcode for Sample");
  2515. break;
  2516. }
  2517. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2518. "otherwise, unused HL arguments for Sample op");
  2519. }
  2520. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2521. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2522. hlsl::OP *hlslOP = &helper.hlslOP;
  2523. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2524. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2525. Translated = false;
  2526. return nullptr;
  2527. }
  2528. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2529. IRBuilder<> Builder(CI);
  2530. Value *opArg =
  2531. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2532. Value *clamped = hlslOP->GetI1Const(bClamped);
  2533. Value *args[] = {opArg,
  2534. sampleHelper.texHandle,
  2535. sampleHelper.samplerHandle,
  2536. sampleHelper.coord[0],
  2537. sampleHelper.coord[1],
  2538. sampleHelper.coord[2],
  2539. clamped};
  2540. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2541. Type::getFloatTy(opArg->getContext()));
  2542. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2543. return LOD;
  2544. }
  2545. Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2546. HLOperationLowerHelper &helper,
  2547. HLObjectOperationLowerHelper *pObjHelper,
  2548. bool &Translated) {
  2549. // Translate CheckAccess into uint->bool, later optimization should remove it.
  2550. // Real checkaccess is generated in UpdateStatus.
  2551. IRBuilder<> Builder(CI);
  2552. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2553. return Builder.CreateTrunc(V, helper.i1Ty);
  2554. }
  2555. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2556. Value *status, hlsl::OP *hlslOp) {
  2557. IRBuilder<> Builder(CI);
  2558. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2559. dxilutil::MigrateDebugValue(CI, call);
  2560. // extract value part
  2561. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2562. // Replace ret val.
  2563. CI->replaceAllUsesWith(retVal);
  2564. // get status
  2565. if (status) {
  2566. UpdateStatus(call, status, Builder, hlslOp);
  2567. }
  2568. }
  2569. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2570. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2571. hlsl::OP *hlslOP = &helper.hlslOP;
  2572. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2573. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2574. Translated = false;
  2575. return nullptr;
  2576. }
  2577. Type *Ty = CI->getType();
  2578. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2579. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2580. switch (opcode) {
  2581. case OP::OpCode::Sample: {
  2582. Value *sampleArgs[] = {
  2583. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2584. // Coord.
  2585. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2586. sampleHelper.coord[3],
  2587. // Offset.
  2588. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2589. // Clamp.
  2590. sampleHelper.clamp};
  2591. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2592. } break;
  2593. case OP::OpCode::SampleLevel: {
  2594. Value *sampleArgs[] = {
  2595. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2596. // Coord.
  2597. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2598. sampleHelper.coord[3],
  2599. // Offset.
  2600. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2601. // LOD.
  2602. sampleHelper.lod};
  2603. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2604. } break;
  2605. case OP::OpCode::SampleGrad: {
  2606. Value *sampleArgs[] = {
  2607. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2608. // Coord.
  2609. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2610. sampleHelper.coord[3],
  2611. // Offset.
  2612. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2613. // Ddx.
  2614. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2615. // Ddy.
  2616. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2617. // Clamp.
  2618. sampleHelper.clamp};
  2619. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2620. } break;
  2621. case OP::OpCode::SampleBias: {
  2622. Value *sampleArgs[] = {
  2623. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2624. // Coord.
  2625. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2626. sampleHelper.coord[3],
  2627. // Offset.
  2628. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2629. // Bias.
  2630. sampleHelper.bias,
  2631. // Clamp.
  2632. sampleHelper.clamp};
  2633. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2634. } break;
  2635. case OP::OpCode::SampleCmp: {
  2636. Value *sampleArgs[] = {
  2637. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2638. // Coord.
  2639. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2640. sampleHelper.coord[3],
  2641. // Offset.
  2642. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2643. // CmpVal.
  2644. sampleHelper.compareValue,
  2645. // Clamp.
  2646. sampleHelper.clamp};
  2647. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2648. } break;
  2649. case OP::OpCode::SampleCmpLevelZero:
  2650. default: {
  2651. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2652. Value *sampleArgs[] = {
  2653. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2654. // Coord.
  2655. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2656. sampleHelper.coord[3],
  2657. // Offset.
  2658. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2659. // CmpVal.
  2660. sampleHelper.compareValue};
  2661. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2662. } break;
  2663. }
  2664. // CI is replaced in GenerateDxilSample.
  2665. return nullptr;
  2666. }
  2667. // Gather intrinsics.
  2668. struct GatherHelper {
  2669. enum class GatherChannel {
  2670. GatherAll,
  2671. GatherRed,
  2672. GatherGreen,
  2673. GatherBlue,
  2674. GatherAlpha,
  2675. };
  2676. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2677. GatherHelper::GatherChannel ch);
  2678. OP::OpCode opcode;
  2679. Value *texHandle;
  2680. Value *samplerHandle;
  2681. static const unsigned kMaxCoordDimensions = 4;
  2682. Value *coord[kMaxCoordDimensions];
  2683. unsigned channel;
  2684. Value *special; // For CompareValue, Bias, LOD.
  2685. // Optional.
  2686. static const unsigned kMaxOffsetDimensions = 2;
  2687. Value *offset[kMaxOffsetDimensions];
  2688. // For the overload send different offset for each sample.
  2689. // Only save 3 sampleOffsets because use offset for normal overload as first
  2690. // sample offset.
  2691. static const unsigned kSampleOffsetDimensions = 3;
  2692. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2693. Value *status;
  2694. bool hasSampleOffsets;
  2695. unsigned maxHLOperandRead = 0;
  2696. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2697. if (CI->getNumArgOperands() > opIdx) {
  2698. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2699. return CI->getArgOperand(opIdx);
  2700. }
  2701. return nullptr;
  2702. }
  2703. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2704. unsigned coordDimensions) {
  2705. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2706. DXASSERT_NOMSG(coordArg);
  2707. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2708. "otherwise, HL coordinate dimensions mismatch");
  2709. IRBuilder<> Builder(CI);
  2710. for (unsigned i = 0; i < coordDimensions; i++)
  2711. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2712. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2713. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2714. coord[i] = undefF;
  2715. }
  2716. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2717. status = ReadHLOperand(CI, statusIdx);
  2718. }
  2719. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2720. unsigned offsetDimensions) {
  2721. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2722. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2723. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2724. "otherwise, HL coordinate dimensions mismatch");
  2725. IRBuilder<> Builder(CI);
  2726. for (unsigned i = 0; i < offsetDimensions; i++)
  2727. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2728. } else {
  2729. // Use zeros for offsets when not specified, not undef.
  2730. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2731. for (unsigned i = 0; i < offsetDimensions; i++)
  2732. offset[i] = zero;
  2733. }
  2734. // Use undef for components that should not be used for this resource dim.
  2735. Value *undefI = UndefValue::get(i32Ty);
  2736. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2737. offset[i] = undefI;
  2738. }
  2739. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2740. unsigned offsetDimensions) {
  2741. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2742. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2743. hasSampleOffsets = true;
  2744. IRBuilder<> Builder(CI);
  2745. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2746. Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch);
  2747. for (unsigned i = 0; i < offsetDimensions; i++)
  2748. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2749. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2750. sampleOffsets[ch][i] = undefI;
  2751. }
  2752. }
  2753. }
  2754. // Update the offset args for gather with sample offset at sampleIdx.
  2755. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2756. unsigned sampleIdx) {
  2757. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2758. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2759. // -1 because offset for sample 0 is in GatherHelper::offset.
  2760. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2761. }
  2762. };
  2763. GatherHelper::GatherHelper(
  2764. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2765. GatherHelper::GatherChannel ch)
  2766. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2767. switch (ch) {
  2768. case GatherChannel::GatherAll:
  2769. channel = 0;
  2770. break;
  2771. case GatherChannel::GatherRed:
  2772. channel = 0;
  2773. break;
  2774. case GatherChannel::GatherGreen:
  2775. channel = 1;
  2776. break;
  2777. case GatherChannel::GatherBlue:
  2778. channel = 2;
  2779. break;
  2780. case GatherChannel::GatherAlpha:
  2781. channel = 3;
  2782. break;
  2783. }
  2784. IRBuilder<> Builder(CI);
  2785. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2786. samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex);
  2787. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2788. if (RK == DXIL::ResourceKind::Invalid) {
  2789. opcode = DXIL::OpCode::NumOpCodes;
  2790. return;
  2791. }
  2792. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2793. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2794. bool cube = RK == DXIL::ResourceKind::TextureCube ||
  2795. RK == DXIL::ResourceKind::TextureCubeArray;
  2796. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2797. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2798. switch (op) {
  2799. case OP::OpCode::TextureGather: {
  2800. unsigned statusIdx;
  2801. if (cube) {
  2802. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2803. statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex;
  2804. } else {
  2805. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2806. // Gather all don't have sample offset version overload.
  2807. if (ch != GatherChannel::GatherAll)
  2808. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2809. offsetSize);
  2810. statusIdx =
  2811. hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
  2812. : HLOperandIndex::kGatherStatusArgIndex;
  2813. }
  2814. SetStatus(CI, statusIdx);
  2815. } break;
  2816. case OP::OpCode::TextureGatherCmp: {
  2817. special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex);
  2818. unsigned statusIdx;
  2819. if (cube) {
  2820. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2821. statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex;
  2822. } else {
  2823. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2824. // Gather all don't have sample offset version overload.
  2825. if (ch != GatherChannel::GatherAll)
  2826. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2827. offsetSize);
  2828. statusIdx =
  2829. hasSampleOffsets
  2830. ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
  2831. : HLOperandIndex::kGatherCmpStatusArgIndex;
  2832. }
  2833. SetStatus(CI, statusIdx);
  2834. } break;
  2835. default:
  2836. DXASSERT(0, "invalid opcode for Gather");
  2837. break;
  2838. }
  2839. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2840. "otherwise, unused HL arguments for Sample op");
  2841. }
  2842. void GenerateDxilGather(CallInst *CI, Function *F,
  2843. MutableArrayRef<Value *> gatherArgs,
  2844. GatherHelper &helper, hlsl::OP *hlslOp) {
  2845. IRBuilder<> Builder(CI);
  2846. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2847. dxilutil::MigrateDebugValue(CI, call);
  2848. Value *retVal;
  2849. if (!helper.hasSampleOffsets) {
  2850. // extract value part
  2851. retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2852. } else {
  2853. retVal = UndefValue::get(CI->getType());
  2854. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2855. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2856. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2857. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2858. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2859. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2860. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2861. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2862. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2863. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2864. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2865. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2866. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2867. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2868. // TODO: UpdateStatus for each gather call.
  2869. }
  2870. // Replace ret val.
  2871. CI->replaceAllUsesWith(retVal);
  2872. // Get status
  2873. if (helper.status) {
  2874. UpdateStatus(call, helper.status, Builder, hlslOp);
  2875. }
  2876. }
  2877. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2878. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2879. hlsl::OP *hlslOP = &helper.hlslOP;
  2880. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2881. switch (IOP) {
  2882. case IntrinsicOp::MOP_Gather:
  2883. case IntrinsicOp::MOP_GatherCmp:
  2884. ch = GatherHelper::GatherChannel::GatherAll;
  2885. break;
  2886. case IntrinsicOp::MOP_GatherRed:
  2887. case IntrinsicOp::MOP_GatherCmpRed:
  2888. ch = GatherHelper::GatherChannel::GatherRed;
  2889. break;
  2890. case IntrinsicOp::MOP_GatherGreen:
  2891. case IntrinsicOp::MOP_GatherCmpGreen:
  2892. ch = GatherHelper::GatherChannel::GatherGreen;
  2893. break;
  2894. case IntrinsicOp::MOP_GatherBlue:
  2895. case IntrinsicOp::MOP_GatherCmpBlue:
  2896. ch = GatherHelper::GatherChannel::GatherBlue;
  2897. break;
  2898. case IntrinsicOp::MOP_GatherAlpha:
  2899. case IntrinsicOp::MOP_GatherCmpAlpha:
  2900. ch = GatherHelper::GatherChannel::GatherAlpha;
  2901. break;
  2902. default:
  2903. DXASSERT(0, "invalid gather intrinsic");
  2904. break;
  2905. }
  2906. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2907. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2908. Translated = false;
  2909. return nullptr;
  2910. }
  2911. Type *Ty = CI->getType();
  2912. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2913. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2914. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2915. switch (opcode) {
  2916. case OP::OpCode::TextureGather: {
  2917. Value *gatherArgs[] = {
  2918. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2919. // Coord.
  2920. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2921. gatherHelper.coord[3],
  2922. // Offset.
  2923. gatherHelper.offset[0], gatherHelper.offset[1],
  2924. // Channel.
  2925. channelArg};
  2926. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2927. } break;
  2928. case OP::OpCode::TextureGatherCmp: {
  2929. Value *gatherArgs[] = {
  2930. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2931. // Coord.
  2932. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2933. gatherHelper.coord[3],
  2934. // Offset.
  2935. gatherHelper.offset[0], gatherHelper.offset[1],
  2936. // Channel.
  2937. channelArg,
  2938. // CmpVal.
  2939. gatherHelper.special};
  2940. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2941. } break;
  2942. default:
  2943. DXASSERT(0, "invalid opcode for Gather");
  2944. break;
  2945. }
  2946. // CI is replaced in GenerateDxilGather.
  2947. return nullptr;
  2948. }
  2949. static Value* TranslateWriteSamplerFeedback(CallInst* CI, IntrinsicOp IOP, OP::OpCode opcode,
  2950. HLOperationLowerHelper& helper,
  2951. HLObjectOperationLowerHelper* pObjHelper,
  2952. bool& Translated) {
  2953. hlsl::OP *hlslOP = &helper.hlslOP;
  2954. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2955. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2956. Translated = false;
  2957. return nullptr;
  2958. }
  2959. Type *Ty = CI->getType();
  2960. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2961. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2962. IRBuilder<> Builder(CI);
  2963. switch (opcode) {
  2964. case OP::OpCode::WriteSamplerFeedback: {
  2965. Value *samplerFeedbackArgs[] = {
  2966. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2967. // Coord.
  2968. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2969. sampleHelper.coord[3],
  2970. // Clamp.
  2971. sampleHelper.clamp};
  2972. return Builder.CreateCall(F, samplerFeedbackArgs);
  2973. } break;
  2974. case OP::OpCode::WriteSamplerFeedbackBias: {
  2975. Value *samplerFeedbackArgs[] = {
  2976. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2977. // Coord.
  2978. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2979. sampleHelper.coord[3],
  2980. // Bias.
  2981. sampleHelper.bias,
  2982. // Clamp.
  2983. sampleHelper.clamp};
  2984. return Builder.CreateCall(F, samplerFeedbackArgs);
  2985. } break;
  2986. case OP::OpCode::WriteSamplerFeedbackGrad: {
  2987. Value *samplerFeedbackArgs[] = {
  2988. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2989. // Coord.
  2990. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2991. sampleHelper.coord[3],
  2992. // Ddx.
  2993. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2994. // Ddy.
  2995. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2996. // Clamp.
  2997. sampleHelper.clamp};
  2998. return Builder.CreateCall(F, samplerFeedbackArgs);
  2999. } break;
  3000. case OP::OpCode::WriteSamplerFeedbackLevel: {
  3001. Value *samplerFeedbackArgs[] = {
  3002. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3003. // Coord.
  3004. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3005. sampleHelper.coord[3],
  3006. // LOD.
  3007. sampleHelper.lod};
  3008. return Builder.CreateCall(F, samplerFeedbackArgs);
  3009. } break;
  3010. default:
  3011. DXASSERT(false, "otherwise, unknown SamplerFeedback Op");
  3012. break;
  3013. }
  3014. return nullptr;
  3015. }
  3016. // Load/Store intrinsics.
  3017. struct ResLoadHelper {
  3018. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3019. Value *h, IntrinsicOp IOP, bool bForSubscript=false);
  3020. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3021. Value *h, Value *mip);
  3022. // For double subscript.
  3023. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  3024. : opcode(OP::OpCode::TextureLoad),
  3025. intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
  3026. addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
  3027. OP::OpCode opcode;
  3028. IntrinsicOp intrinsicOpCode;
  3029. unsigned dxilMajor;
  3030. unsigned dxilMinor;
  3031. Value *handle;
  3032. Value *retVal;
  3033. Value *addr;
  3034. Value *offset;
  3035. Value *status;
  3036. Value *mipLevel;
  3037. };
  3038. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3039. DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
  3040. : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
  3041. switch (RK) {
  3042. case DxilResource::Kind::RawBuffer:
  3043. case DxilResource::Kind::StructuredBuffer:
  3044. case DxilResource::Kind::StructuredBufferWithCounter:
  3045. opcode = OP::OpCode::RawBufferLoad;
  3046. break;
  3047. case DxilResource::Kind::TypedBuffer:
  3048. opcode = OP::OpCode::BufferLoad;
  3049. break;
  3050. case DxilResource::Kind::Invalid:
  3051. DXASSERT(0, "invalid resource kind");
  3052. break;
  3053. default:
  3054. opcode = OP::OpCode::TextureLoad;
  3055. break;
  3056. }
  3057. retVal = CI;
  3058. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  3059. addr = CI->getArgOperand(kAddrIdx);
  3060. unsigned argc = CI->getNumArgOperands();
  3061. if (opcode == OP::OpCode::TextureLoad) {
  3062. // mip at last channel
  3063. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3064. if (RC == DxilResourceBase::Class::SRV) {
  3065. if (bForSubscript) {
  3066. // Use 0 when access by [].
  3067. mipLevel = IRBuilder<>(CI).getInt32(0);
  3068. } else {
  3069. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  3070. // Use addr when access by Load.
  3071. mipLevel = addr;
  3072. } else {
  3073. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  3074. }
  3075. }
  3076. } else {
  3077. // Set mip level to undef for UAV.
  3078. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  3079. }
  3080. if (RC == DxilResourceBase::Class::SRV) {
  3081. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  3082. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  3083. if (RK == DxilResource::Kind::Texture2DMS ||
  3084. RK == DxilResource::Kind::Texture2DMSArray) {
  3085. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  3086. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  3087. mipLevel =
  3088. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  3089. }
  3090. if (argc > offsetIdx)
  3091. offset = CI->getArgOperand(offsetIdx);
  3092. if (argc > statusIdx)
  3093. status = CI->getArgOperand(statusIdx);
  3094. } else {
  3095. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  3096. if (argc > kStatusIdx)
  3097. status = CI->getArgOperand(kStatusIdx);
  3098. }
  3099. } else {
  3100. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  3101. if (argc > kStatusIdx)
  3102. status = CI->getArgOperand(kStatusIdx);
  3103. }
  3104. }
  3105. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3106. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  3107. : handle(hdl), offset(nullptr), status(nullptr) {
  3108. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  3109. RK != DxilResource::Kind::TypedBuffer &&
  3110. RK != DxilResource::Kind::Invalid,
  3111. "invalid resource kind");
  3112. opcode = OP::OpCode::TextureLoad;
  3113. retVal = CI;
  3114. mipLevel = mip;
  3115. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  3116. addr = CI->getArgOperand(kAddrIdx);
  3117. unsigned argc = CI->getNumArgOperands();
  3118. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  3119. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  3120. if (argc > kOffsetIdx)
  3121. offset = CI->getArgOperand(kOffsetIdx);
  3122. if (argc > kStatusIdx)
  3123. status = CI->getArgOperand(kStatusIdx);
  3124. }
  3125. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  3126. hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL);
  3127. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  3128. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  3129. unsigned size, MutableArrayRef<Value *> resultElts,
  3130. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3131. Type *i64Ty = Builder.getInt64Ty();
  3132. Type *doubleTy = Builder.getDoubleTy();
  3133. if (EltTy == doubleTy) {
  3134. Function *makeDouble =
  3135. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  3136. Value *makeDoubleOpArg =
  3137. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  3138. for (unsigned i = 0; i < size; i++) {
  3139. Value *lo = resultElts32[2 * i];
  3140. Value *hi = resultElts32[2 * i + 1];
  3141. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  3142. resultElts[i] = V;
  3143. }
  3144. } else {
  3145. for (unsigned i = 0; i < size; i++) {
  3146. Value *lo = resultElts32[2 * i];
  3147. Value *hi = resultElts32[2 * i + 1];
  3148. lo = Builder.CreateZExt(lo, i64Ty);
  3149. hi = Builder.CreateZExt(hi, i64Ty);
  3150. hi = Builder.CreateShl(hi, 32);
  3151. resultElts[i] = Builder.CreateOr(lo, hi);
  3152. }
  3153. }
  3154. }
  3155. static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
  3156. unsigned mask = 0;
  3157. switch (NumComponents) {
  3158. case 0:
  3159. break;
  3160. case 1:
  3161. mask = DXIL::kCompMask_X;
  3162. break;
  3163. case 2:
  3164. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  3165. break;
  3166. case 3:
  3167. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  3168. break;
  3169. case 4:
  3170. mask = DXIL::kCompMask_All;
  3171. break;
  3172. default:
  3173. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  3174. }
  3175. return OP->GetI8Const(mask);
  3176. }
  3177. Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  3178. Value *status, Type *EltTy,
  3179. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  3180. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
  3181. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  3182. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  3183. Type *Ty = helper.retVal->getType();
  3184. if (Ty->isPointerTy()) {
  3185. DXASSERT(!DxilResource::IsAnyTexture(RK), "Textures should not be treated as structured buffers.");
  3186. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  3187. helper.status, OP, RK, DL);
  3188. return;
  3189. }
  3190. OP::OpCode opcode = helper.opcode;
  3191. Type *i32Ty = Builder.getInt32Ty();
  3192. Type *i64Ty = Builder.getInt64Ty();
  3193. Type *doubleTy = Builder.getDoubleTy();
  3194. Type *EltTy = Ty->getScalarType();
  3195. // If RawBuffer load of 64-bit value, don't set alignment to 8,
  3196. // since buffer alignment isn't known to be anything over 4.
  3197. unsigned alignValue = OP->GetAllocSizeForType(EltTy);
  3198. if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
  3199. alignValue = 4;
  3200. Constant *Alignment = OP->GetI32Const(alignValue);
  3201. unsigned numComponents = 1;
  3202. if (Ty->isVectorTy()) {
  3203. numComponents = Ty->getVectorNumElements();
  3204. }
  3205. if (DXIL::IsStructuredBuffer(RK)) {
  3206. // Basic type case for StructuredBuffer::Load()
  3207. Value *ResultElts[4];
  3208. Value *StructBufLoad = GenerateStructBufLd(helper.handle, helper.addr, OP->GetU32Const(0),
  3209. helper.status, EltTy, ResultElts, OP, Builder, numComponents, Alignment);
  3210. dxilutil::MigrateDebugValue(helper.retVal, StructBufLoad);
  3211. Value *retValNew = ScalarizeElements(Ty, ResultElts, Builder);
  3212. helper.retVal->replaceAllUsesWith(retValNew);
  3213. helper.retVal = retValNew;
  3214. return;
  3215. }
  3216. bool isTyped = opcode == OP::OpCode::TextureLoad ||
  3217. RK == DxilResource::Kind::TypedBuffer;
  3218. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3219. if (is64 && isTyped) {
  3220. EltTy = i32Ty;
  3221. }
  3222. bool isBool = EltTy->isIntegerTy(1);
  3223. if (isBool) {
  3224. // Value will be loaded in its memory representation.
  3225. EltTy = i32Ty;
  3226. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, numComponents);
  3227. }
  3228. Function *F = OP->GetOpFunc(opcode, EltTy);
  3229. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3230. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  3231. SmallVector<Value *, 12> loadArgs;
  3232. loadArgs.emplace_back(opArg); // opcode
  3233. loadArgs.emplace_back(helper.handle); // resource handle
  3234. if (opcode == OP::OpCode::TextureLoad) {
  3235. // set mip level
  3236. loadArgs.emplace_back(helper.mipLevel);
  3237. }
  3238. if (opcode == OP::OpCode::TextureLoad) {
  3239. // texture coord
  3240. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3241. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  3242. for (unsigned i = 0; i < 3; i++) {
  3243. if (i < coordSize) {
  3244. loadArgs.emplace_back(
  3245. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  3246. }
  3247. else
  3248. loadArgs.emplace_back(undefI);
  3249. }
  3250. } else {
  3251. if (helper.addr->getType()->isVectorTy()) {
  3252. Value *scalarOffset =
  3253. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  3254. // TODO: calculate the real address based on opcode
  3255. loadArgs.emplace_back(scalarOffset); // offset
  3256. } else {
  3257. // TODO: calculate the real address based on opcode
  3258. loadArgs.emplace_back(helper.addr); // offset
  3259. }
  3260. }
  3261. // offset 0
  3262. if (opcode == OP::OpCode::TextureLoad) {
  3263. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  3264. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  3265. for (unsigned i = 0; i < 3; i++) {
  3266. if (i < offsetSize)
  3267. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  3268. else
  3269. loadArgs.emplace_back(undefI);
  3270. }
  3271. } else {
  3272. loadArgs.emplace_back(undefI);
  3273. loadArgs.emplace_back(undefI);
  3274. loadArgs.emplace_back(undefI);
  3275. }
  3276. }
  3277. // Offset 1
  3278. if (RK == DxilResource::Kind::RawBuffer) {
  3279. // elementOffset, mask, alignment
  3280. loadArgs.emplace_back(undefI);
  3281. Type *rtnTy = helper.retVal->getType();
  3282. loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
  3283. loadArgs.emplace_back(Alignment);
  3284. }
  3285. else if (RK == DxilResource::Kind::TypedBuffer) {
  3286. loadArgs.emplace_back(undefI);
  3287. }
  3288. Value *ResRet =
  3289. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  3290. dxilutil::MigrateDebugValue(helper.retVal, ResRet);
  3291. Value *retValNew = nullptr;
  3292. if (!is64 || !isTyped) {
  3293. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  3294. } else {
  3295. unsigned size = numComponents;
  3296. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  3297. EltTy = Ty->getScalarType();
  3298. Value *Elts[2];
  3299. Make64bitResultForLoad(Ty->getScalarType(),
  3300. {
  3301. Builder.CreateExtractValue(ResRet, 0),
  3302. Builder.CreateExtractValue(ResRet, 1),
  3303. Builder.CreateExtractValue(ResRet, 2),
  3304. Builder.CreateExtractValue(ResRet, 3),
  3305. },
  3306. size, Elts, OP, Builder);
  3307. retValNew = ScalarizeElements(Ty, Elts, Builder);
  3308. }
  3309. if (isBool) {
  3310. // Convert result back to register representation.
  3311. retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
  3312. }
  3313. // replace
  3314. helper.retVal->replaceAllUsesWith(retValNew);
  3315. // Save new ret val.
  3316. helper.retVal = retValNew;
  3317. // get status
  3318. UpdateStatus(ResRet, helper.status, Builder, OP);
  3319. }
  3320. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3321. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3322. hlsl::OP *hlslOP = &helper.hlslOP;
  3323. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3324. IRBuilder<> Builder(CI);
  3325. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  3326. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3327. ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
  3328. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
  3329. // CI is replaced in TranslateLoad.
  3330. return nullptr;
  3331. }
  3332. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  3333. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  3334. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  3335. IRBuilder<> &Builder) {
  3336. Type *i32Ty = Builder.getInt32Ty();
  3337. Type *doubleTy = Builder.getDoubleTy();
  3338. Value *undefI32 = UndefValue::get(i32Ty);
  3339. if (EltTy == doubleTy) {
  3340. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  3341. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  3342. for (unsigned i = 0; i < size; i++) {
  3343. if (isa<UndefValue>(vals[i])) {
  3344. vals32[2 * i] = undefI32;
  3345. vals32[2 * i + 1] = undefI32;
  3346. } else {
  3347. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  3348. Value *lo = Builder.CreateExtractValue(retVal, 0);
  3349. Value *hi = Builder.CreateExtractValue(retVal, 1);
  3350. vals32[2 * i] = lo;
  3351. vals32[2 * i + 1] = hi;
  3352. }
  3353. }
  3354. } else {
  3355. for (unsigned i = 0; i < size; i++) {
  3356. if (isa<UndefValue>(vals[i])) {
  3357. vals32[2 * i] = undefI32;
  3358. vals32[2 * i + 1] = undefI32;
  3359. } else {
  3360. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  3361. Value *hi = Builder.CreateLShr(vals[i], 32);
  3362. hi = Builder.CreateTrunc(hi, i32Ty);
  3363. vals32[2 * i] = lo;
  3364. vals32[2 * i + 1] = hi;
  3365. }
  3366. }
  3367. }
  3368. }
  3369. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  3370. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  3371. Type *Ty = val->getType();
  3372. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  3373. switch (RK) {
  3374. case DxilResource::Kind::RawBuffer:
  3375. case DxilResource::Kind::StructuredBuffer:
  3376. case DxilResource::Kind::StructuredBufferWithCounter:
  3377. opcode = OP::OpCode::RawBufferStore;
  3378. break;
  3379. case DxilResource::Kind::TypedBuffer:
  3380. opcode = OP::OpCode::BufferStore;
  3381. break;
  3382. case DxilResource::Kind::Invalid:
  3383. DXASSERT(0, "invalid resource kind");
  3384. break;
  3385. default:
  3386. opcode = OP::OpCode::TextureStore;
  3387. break;
  3388. }
  3389. bool isTyped = opcode == OP::OpCode::TextureStore ||
  3390. RK == DxilResource::Kind::TypedBuffer;
  3391. Type *i32Ty = Builder.getInt32Ty();
  3392. Type *i64Ty = Builder.getInt64Ty();
  3393. Type *doubleTy = Builder.getDoubleTy();
  3394. Type *EltTy = Ty->getScalarType();
  3395. if (EltTy->isIntegerTy(1)) {
  3396. // Since we're going to memory, convert bools to their memory representation.
  3397. EltTy = i32Ty;
  3398. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, Ty->getVectorNumElements());
  3399. else Ty = EltTy;
  3400. val = Builder.CreateZExt(val, Ty);
  3401. }
  3402. // If RawBuffer store of 64-bit value, don't set alignment to 8,
  3403. // since buffer alignment isn't known to be anything over 4.
  3404. unsigned alignValue = OP->GetAllocSizeForType(EltTy);
  3405. if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
  3406. alignValue = 4;
  3407. Constant *Alignment = OP->GetI32Const(alignValue);
  3408. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3409. if (is64 && isTyped) {
  3410. EltTy = i32Ty;
  3411. }
  3412. Function *F = OP->GetOpFunc(opcode, EltTy);
  3413. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3414. llvm::Value *undefI =
  3415. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  3416. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  3417. SmallVector<Value *, 13> storeArgs;
  3418. storeArgs.emplace_back(opArg); // opcode
  3419. storeArgs.emplace_back(handle); // resource handle
  3420. if (RK == DxilResource::Kind::RawBuffer ||
  3421. RK == DxilResource::Kind::TypedBuffer) {
  3422. // Offset 0
  3423. if (offset->getType()->isVectorTy()) {
  3424. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  3425. storeArgs.emplace_back(scalarOffset); // offset
  3426. } else {
  3427. storeArgs.emplace_back(offset); // offset
  3428. }
  3429. // Offset 1
  3430. storeArgs.emplace_back(undefI);
  3431. } else {
  3432. // texture store
  3433. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3434. // Set x first.
  3435. if (offset->getType()->isVectorTy())
  3436. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  3437. else
  3438. storeArgs.emplace_back(offset);
  3439. for (unsigned i = 1; i < 3; i++) {
  3440. if (i < coordSize)
  3441. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  3442. else
  3443. storeArgs.emplace_back(undefI);
  3444. }
  3445. // TODO: support mip for texture ST
  3446. }
  3447. // values
  3448. uint8_t mask = 0;
  3449. if (Ty->isVectorTy()) {
  3450. unsigned vecSize = Ty->getVectorNumElements();
  3451. Value *emptyVal = undefVal;
  3452. if (isTyped) {
  3453. mask = DXIL::kCompMask_All;
  3454. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  3455. }
  3456. for (unsigned i = 0; i < 4; i++) {
  3457. if (i < vecSize) {
  3458. storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
  3459. mask |= (1<<i);
  3460. } else {
  3461. storeArgs.emplace_back(emptyVal);
  3462. }
  3463. }
  3464. } else {
  3465. if (isTyped) {
  3466. mask = DXIL::kCompMask_All;
  3467. storeArgs.emplace_back(val);
  3468. storeArgs.emplace_back(val);
  3469. storeArgs.emplace_back(val);
  3470. storeArgs.emplace_back(val);
  3471. } else {
  3472. storeArgs.emplace_back(val);
  3473. storeArgs.emplace_back(undefVal);
  3474. storeArgs.emplace_back(undefVal);
  3475. storeArgs.emplace_back(undefVal);
  3476. mask = DXIL::kCompMask_X;
  3477. }
  3478. }
  3479. if (is64 && isTyped) {
  3480. unsigned size = 1;
  3481. if (Ty->isVectorTy()) {
  3482. size = Ty->getVectorNumElements();
  3483. }
  3484. DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
  3485. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  3486. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  3487. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  3488. Value *V0 = storeArgs[val0OpIdx];
  3489. Value *V1 = storeArgs[val0OpIdx+1];
  3490. Value *vals32[4];
  3491. EltTy = Ty->getScalarType();
  3492. Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
  3493. // Fill the uninit vals.
  3494. if (size == 1) {
  3495. vals32[2] = vals32[0];
  3496. vals32[3] = vals32[1];
  3497. }
  3498. // Change valOp to 32 version.
  3499. for (unsigned i = 0; i < 4; i++) {
  3500. storeArgs[val0OpIdx + i] = vals32[i];
  3501. }
  3502. // change mask for double
  3503. if (opcode == DXIL::OpCode::RawBufferStore) {
  3504. mask = size == 1 ?
  3505. DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
  3506. }
  3507. }
  3508. storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
  3509. if (opcode == DXIL::OpCode::RawBufferStore)
  3510. storeArgs.emplace_back(Alignment); // alignment only for raw buffer
  3511. Builder.CreateCall(F, storeArgs);
  3512. }
  3513. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3514. HLOperationLowerHelper &helper,
  3515. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3516. hlsl::OP *hlslOP = &helper.hlslOP;
  3517. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3518. IRBuilder<> Builder(CI);
  3519. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3520. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  3521. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  3522. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  3523. return nullptr;
  3524. }
  3525. }
  3526. // Atomic intrinsics.
  3527. namespace {
  3528. // Atomic intrinsics.
  3529. struct AtomicHelper {
  3530. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h);
  3531. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3532. Value *baseOffset);
  3533. OP::OpCode opcode;
  3534. Value *handle;
  3535. Value *addr;
  3536. Value *offset; // Offset for structrued buffer.
  3537. Value *value;
  3538. Value *originalValue;
  3539. Value *compareValue;
  3540. };
  3541. // For MOP version of Interlocked*.
  3542. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h)
  3543. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr) {
  3544. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3545. if (op == OP::OpCode::AtomicCompareExchange) {
  3546. compareValue = CI->getArgOperand(
  3547. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3548. value =
  3549. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3550. if (CI->getNumArgOperands() ==
  3551. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3552. originalValue = CI->getArgOperand(
  3553. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3554. } else {
  3555. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3556. if (CI->getNumArgOperands() ==
  3557. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3558. originalValue = CI->getArgOperand(
  3559. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3560. }
  3561. }
  3562. // For IOP version of Interlocked*.
  3563. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3564. Value *baseOffset)
  3565. : opcode(op), handle(h), addr(bufIdx),
  3566. offset(baseOffset), originalValue(nullptr) {
  3567. if (op == OP::OpCode::AtomicCompareExchange) {
  3568. compareValue =
  3569. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3570. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3571. if (CI->getNumArgOperands() ==
  3572. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3573. originalValue = CI->getArgOperand(
  3574. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3575. } else {
  3576. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3577. if (CI->getNumArgOperands() ==
  3578. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3579. originalValue =
  3580. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3581. }
  3582. }
  3583. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3584. DXIL::AtomicBinOpCode atomicOp,
  3585. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3586. Value *handle = helper.handle;
  3587. Value *addr = helper.addr;
  3588. Value *val = helper.value;
  3589. Type *Ty = val->getType();
  3590. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3591. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3592. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3593. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3594. Value *args[] = {opArg, handle, atomicOpArg,
  3595. undefI, undefI, undefI, // coordinates
  3596. val};
  3597. // Setup coordinates.
  3598. if (addr->getType()->isVectorTy()) {
  3599. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3600. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3601. _Analysis_assume_(vectorNumElements <= 3);
  3602. for (unsigned i = 0; i < vectorNumElements; i++) {
  3603. Value *Elt = Builder.CreateExtractElement(addr, i);
  3604. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3605. }
  3606. } else
  3607. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3608. // Set offset for structured buffer.
  3609. if (helper.offset)
  3610. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3611. Value *origVal =
  3612. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3613. if (helper.originalValue) {
  3614. Builder.CreateStore(origVal, helper.originalValue);
  3615. }
  3616. }
  3617. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3618. OP::OpCode opcode,
  3619. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3620. hlsl::OP *hlslOP = &helper.hlslOP;
  3621. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3622. IRBuilder<> Builder(CI);
  3623. switch (IOP) {
  3624. case IntrinsicOp::MOP_InterlockedAdd: {
  3625. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3626. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3627. hlslOP);
  3628. } break;
  3629. case IntrinsicOp::MOP_InterlockedAnd: {
  3630. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3631. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3632. hlslOP);
  3633. } break;
  3634. case IntrinsicOp::MOP_InterlockedExchange: {
  3635. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3636. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3637. Builder, hlslOP);
  3638. } break;
  3639. case IntrinsicOp::MOP_InterlockedMax: {
  3640. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3641. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3642. hlslOP);
  3643. } break;
  3644. case IntrinsicOp::MOP_InterlockedMin: {
  3645. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3646. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3647. hlslOP);
  3648. } break;
  3649. case IntrinsicOp::MOP_InterlockedUMax: {
  3650. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3651. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3652. hlslOP);
  3653. } break;
  3654. case IntrinsicOp::MOP_InterlockedUMin: {
  3655. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3656. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3657. hlslOP);
  3658. } break;
  3659. case IntrinsicOp::MOP_InterlockedOr: {
  3660. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3661. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3662. hlslOP);
  3663. } break;
  3664. case IntrinsicOp::MOP_InterlockedXor: {
  3665. default:
  3666. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor,
  3667. "invalid MOP atomic intrinsic");
  3668. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3669. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3670. hlslOP);
  3671. } break;
  3672. }
  3673. return nullptr;
  3674. }
  3675. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3676. hlsl::OP *hlslOP) {
  3677. Value *handle = helper.handle;
  3678. Value *addr = helper.addr;
  3679. Value *val = helper.value;
  3680. Value *cmpVal = helper.compareValue;
  3681. Type *Ty = val->getType();
  3682. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3683. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3684. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3685. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3686. cmpVal, val};
  3687. // Setup coordinates.
  3688. if (addr->getType()->isVectorTy()) {
  3689. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3690. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3691. _Analysis_assume_(vectorNumElements <= 3);
  3692. for (unsigned i = 0; i < vectorNumElements; i++) {
  3693. Value *Elt = Builder.CreateExtractElement(addr, i);
  3694. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3695. }
  3696. } else
  3697. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3698. // Set offset for structured buffer.
  3699. if (helper.offset)
  3700. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3701. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3702. if (helper.originalValue) {
  3703. Builder.CreateStore(origVal, helper.originalValue);
  3704. }
  3705. }
  3706. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3707. OP::OpCode opcode,
  3708. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3709. hlsl::OP *hlslOP = &helper.hlslOP;
  3710. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3711. IRBuilder<> Builder(CI);
  3712. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle);
  3713. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3714. return nullptr;
  3715. }
  3716. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3717. AtomicRMWInst::BinOp Op;
  3718. switch (IOP) {
  3719. case IntrinsicOp::IOP_InterlockedAdd:
  3720. Op = AtomicRMWInst::BinOp::Add;
  3721. break;
  3722. case IntrinsicOp::IOP_InterlockedAnd:
  3723. Op = AtomicRMWInst::BinOp::And;
  3724. break;
  3725. case IntrinsicOp::IOP_InterlockedExchange:
  3726. Op = AtomicRMWInst::BinOp::Xchg;
  3727. break;
  3728. case IntrinsicOp::IOP_InterlockedMax:
  3729. Op = AtomicRMWInst::BinOp::Max;
  3730. break;
  3731. case IntrinsicOp::IOP_InterlockedUMax:
  3732. Op = AtomicRMWInst::BinOp::UMax;
  3733. break;
  3734. case IntrinsicOp::IOP_InterlockedMin:
  3735. Op = AtomicRMWInst::BinOp::Min;
  3736. break;
  3737. case IntrinsicOp::IOP_InterlockedUMin:
  3738. Op = AtomicRMWInst::BinOp::UMin;
  3739. break;
  3740. case IntrinsicOp::IOP_InterlockedOr:
  3741. Op = AtomicRMWInst::BinOp::Or;
  3742. break;
  3743. case IntrinsicOp::IOP_InterlockedXor:
  3744. default:
  3745. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3746. Op = AtomicRMWInst::BinOp::Xor;
  3747. break;
  3748. }
  3749. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3750. IRBuilder<> Builder(CI);
  3751. Value *Result = Builder.CreateAtomicRMW(
  3752. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3753. if (CI->getNumArgOperands() >
  3754. HLOperandIndex::kInterlockedOriginalValueOpIndex)
  3755. Builder.CreateStore(
  3756. Result,
  3757. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3758. }
  3759. static Value* SkipAddrSpaceCast(Value* Ptr) {
  3760. if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr))
  3761. return CastInst->getOperand(0);
  3762. else if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) {
  3763. if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) {
  3764. return ConstExpr->getOperand(0);
  3765. }
  3766. }
  3767. return Ptr;
  3768. }
  3769. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3770. DXIL::OpCode opcode,
  3771. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3772. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3773. addr = SkipAddrSpaceCast(addr);
  3774. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3775. if (addressSpace == DXIL::kTGSMAddrSpace)
  3776. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3777. else {
  3778. // buffer atomic translated in TranslateSubscript.
  3779. // Do nothing here.
  3780. // Mark not translated.
  3781. Translated = false;
  3782. }
  3783. return nullptr;
  3784. }
  3785. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3786. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3787. Value *cmpVal =
  3788. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3789. IRBuilder<> Builder(CI);
  3790. Value *Result = Builder.CreateAtomicCmpXchg(
  3791. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3792. AtomicOrdering::SequentiallyConsistent);
  3793. if (CI->getNumArgOperands() >
  3794. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3795. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3796. Builder.CreateStore(
  3797. originVal,
  3798. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3799. }
  3800. }
  3801. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3802. DXIL::OpCode opcode,
  3803. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3804. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3805. addr = SkipAddrSpaceCast(addr);
  3806. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3807. if (addressSpace == DXIL::kTGSMAddrSpace)
  3808. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3809. else {
  3810. // buffer atomic translated in TranslateSubscript.
  3811. // Do nothing here.
  3812. // Mark not translated.
  3813. Translated = false;
  3814. }
  3815. return nullptr;
  3816. }
  3817. }
  3818. // Process Tess Factor.
  3819. namespace {
  3820. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3821. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3822. float fMin = 0;
  3823. float fMax = 1;
  3824. Type *f32Ty = input->getType()->getScalarType();
  3825. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3826. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3827. Type *Ty = input->getType();
  3828. if (Ty->isVectorTy())
  3829. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3830. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3831. if (Ty->isVectorTy())
  3832. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3833. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3834. }
  3835. // Clamp to [1.0f..Inf], NaN->1.0f.
  3836. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3837. {
  3838. float fMin = 1.0;
  3839. Type *f32Ty = input->getType()->getScalarType();
  3840. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3841. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3842. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3843. }
  3844. // Do partitioning-specific clamping.
  3845. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3846. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3847. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3848. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3849. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3850. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3851. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3852. float fMin;
  3853. float fMax;
  3854. switch (partitionMode) {
  3855. case DXIL::TessellatorPartitioning::Integer:
  3856. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3857. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3858. break;
  3859. case DXIL::TessellatorPartitioning::Pow2:
  3860. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3861. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3862. break;
  3863. case DXIL::TessellatorPartitioning::FractionalOdd:
  3864. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3865. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3866. break;
  3867. case DXIL::TessellatorPartitioning::FractionalEven:
  3868. default:
  3869. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3870. "invalid partition mode");
  3871. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3872. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3873. break;
  3874. }
  3875. Type *f32Ty = input->getType()->getScalarType();
  3876. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3877. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3878. Type *Ty = input->getType();
  3879. if (Ty->isVectorTy())
  3880. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3881. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3882. if (Ty->isVectorTy())
  3883. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3884. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3885. }
  3886. // round up for integer/pow2 partitioning
  3887. // note that this code assumes the inputs should be in the range [1, inf),
  3888. // which should be enforced by the clamp above.
  3889. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3890. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3891. switch (partitionMode) {
  3892. case DXIL::TessellatorPartitioning::Integer:
  3893. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  3894. case DXIL::TessellatorPartitioning::Pow2: {
  3895. const unsigned kExponentMask = 0x7f800000;
  3896. const unsigned kExponentLSB = 0x00800000;
  3897. const unsigned kMantissaMask = 0x007fffff;
  3898. Type *Ty = input->getType();
  3899. // (val = (asuint(val) & mantissamask) ?
  3900. // (asuint(val) & exponentmask) + exponentbump :
  3901. // asuint(val) & exponentmask;
  3902. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  3903. if (Ty->isVectorTy())
  3904. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  3905. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  3906. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  3907. mantMask = SplatToVector(mantMask, uintTy, Builder);
  3908. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  3909. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  3910. expMask = SplatToVector(expMask, uintTy, Builder);
  3911. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  3912. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  3913. expLSB = SplatToVector(expLSB, uintTy, Builder);
  3914. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  3915. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  3916. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  3917. return Builder.CreateUIToFP(factors, Ty);
  3918. } break;
  3919. case DXIL::TessellatorPartitioning::FractionalEven:
  3920. case DXIL::TessellatorPartitioning::FractionalOdd:
  3921. return input;
  3922. default:
  3923. DXASSERT(0, "invalid partition mode");
  3924. return nullptr;
  3925. }
  3926. }
  3927. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3928. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3929. hlsl::OP *hlslOP = &helper.hlslOP;
  3930. // Get partition mode
  3931. DXASSERT_NOMSG(helper.functionProps);
  3932. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3933. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3934. IRBuilder<> Builder(CI);
  3935. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  3936. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  3937. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  3938. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  3939. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  3940. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  3941. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  3942. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  3943. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3944. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  3945. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  3946. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  3947. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  3948. Builder.CreateStore(temp, roundedDetailFactor);
  3949. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  3950. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  3951. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  3952. Builder.CreateStore(temp, roundedDensityFactor);
  3953. return nullptr;
  3954. }
  3955. // 3 inputs, 1 result
  3956. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  3957. IRBuilder<> &Builder) {
  3958. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3959. Value *input1 = Builder.CreateExtractElement(input, 1);
  3960. Value *input2 = Builder.CreateExtractElement(input, 2);
  3961. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3962. Value *temp =
  3963. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3964. Value *combined =
  3965. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  3966. return combined;
  3967. } else {
  3968. // Avg.
  3969. Value *temp = Builder.CreateFAdd(input0, input1);
  3970. Value *combined = Builder.CreateFAdd(temp, input2);
  3971. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  3972. combined = Builder.CreateFMul(combined, rcp);
  3973. return combined;
  3974. }
  3975. }
  3976. // 4 inputs, 1 result
  3977. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3978. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3979. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3980. Value *input1 = Builder.CreateExtractElement(input, 1);
  3981. Value *input2 = Builder.CreateExtractElement(input, 2);
  3982. Value *input3 = Builder.CreateExtractElement(input, 3);
  3983. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3984. Value *temp0 =
  3985. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3986. Value *temp1 =
  3987. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3988. Value *combined =
  3989. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  3990. return combined;
  3991. } else {
  3992. // Avg.
  3993. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3994. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3995. Value *combined = Builder.CreateFAdd(temp0, temp1);
  3996. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  3997. combined = Builder.CreateFMul(combined, rcp);
  3998. return combined;
  3999. }
  4000. }
  4001. // 4 inputs, 2 result
  4002. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  4003. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4004. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4005. Value *input1 = Builder.CreateExtractElement(input, 1);
  4006. Value *input2 = Builder.CreateExtractElement(input, 2);
  4007. Value *input3 = Builder.CreateExtractElement(input, 3);
  4008. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4009. Value *temp0 =
  4010. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4011. Value *temp1 =
  4012. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  4013. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4014. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4015. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4016. return combined;
  4017. } else {
  4018. // Avg.
  4019. Value *temp0 = Builder.CreateFAdd(input0, input1);
  4020. Value *temp1 = Builder.CreateFAdd(input2, input3);
  4021. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4022. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4023. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4024. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  4025. rcp = ConstantVector::getSplat(2, rcp);
  4026. combined = Builder.CreateFMul(combined, rcp);
  4027. return combined;
  4028. }
  4029. }
  4030. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  4031. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4032. Value *clampedResult = *pClampedResult;
  4033. Value *clampedVal = clampedResult;
  4034. Value *roundedVal = rounded;
  4035. // Do partitioning-specific clamping.
  4036. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  4037. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  4038. if (clampedAvg->getType()->isVectorTy())
  4039. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  4040. // Limit the value.
  4041. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  4042. // Round up for integer/pow2 partitioning.
  4043. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  4044. if (rounded->getType() != cutoffVals->getType())
  4045. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  4046. // If the scaled value is less than three, then take the unscaled average.
  4047. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  4048. if (clampedAvg->getType() != clampedVal->getType())
  4049. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  4050. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  4051. if (roundedAvg->getType() != roundedVal->getType())
  4052. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  4053. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  4054. return result;
  4055. }
  4056. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  4057. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4058. Value *finalResult = *pFinalResult;
  4059. Value *clampedResult = *pClampedResult;
  4060. Value *clampR = clampedResult;
  4061. Value *finalR = finalResult;
  4062. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  4063. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  4064. Value *minValsX = cutoffVals;
  4065. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  4066. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  4067. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  4068. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  4069. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  4070. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  4071. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  4072. // Don't go over our threshold ("final" one is rounded).
  4073. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  4074. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  4075. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  4076. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  4077. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  4078. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  4079. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  4080. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  4081. }
  4082. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4083. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4084. hlsl::OP *hlslOP = &helper.hlslOP;
  4085. // Get partition mode
  4086. DXASSERT_NOMSG(helper.functionProps);
  4087. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  4088. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  4089. IRBuilder<> Builder(CI);
  4090. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  4091. switch (IOP) {
  4092. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4093. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4094. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4095. tessFactorOp = DXIL::OpCode::FMax;
  4096. break;
  4097. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4098. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4099. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4100. tessFactorOp = DXIL::OpCode::FMin;
  4101. break;
  4102. default:
  4103. // Default is Avg.
  4104. break;
  4105. }
  4106. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  4107. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  4108. // Clamp to [0.0f..1.0f], NaN->0.0f.
  4109. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  4110. // Do partitioning-specific clamping.
  4111. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  4112. // Round up for integer/pow2 partitioning.
  4113. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  4114. // Store the output.
  4115. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  4116. Builder.CreateStore(rounded, roundedEdgeFactor);
  4117. // Clamp to [1.0f..Inf], NaN->1.0f.
  4118. bool isQuad = false;
  4119. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  4120. Value *factors = nullptr;
  4121. switch (IOP) {
  4122. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4123. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4124. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4125. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4126. break;
  4127. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4128. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4129. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4130. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4131. isQuad = true;
  4132. break;
  4133. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4134. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4135. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4136. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4137. break;
  4138. default:
  4139. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4140. break;
  4141. }
  4142. Value *scaledI = nullptr;
  4143. if (scales->getType() == factors->getType())
  4144. scaledI = Builder.CreateFMul(factors, scales);
  4145. else {
  4146. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  4147. scaledI = Builder.CreateFMul(vecFactors, scales);
  4148. }
  4149. // Do partitioning-specific clamping.
  4150. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  4151. // Round up for integer/pow2 partitioning.
  4152. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  4153. Value *finalI = roundedI;
  4154. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  4155. // If not max, set to AVG.
  4156. if (tessFactorOp != DXIL::OpCode::FMax)
  4157. tessFactorOp = DXIL::OpCode::NumOpCodes;
  4158. bool b2D = false;
  4159. Value *avgFactorsI = nullptr;
  4160. switch (IOP) {
  4161. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4162. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4163. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4164. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4165. b2D = true;
  4166. break;
  4167. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4168. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4169. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4170. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4171. break;
  4172. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4173. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4174. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4175. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4176. break;
  4177. default:
  4178. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4179. break;
  4180. }
  4181. finalI =
  4182. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  4183. partition, hlslOP, Builder);
  4184. if (b2D)
  4185. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  4186. }
  4187. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  4188. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  4189. if (outFactorTy != clampedI->getType()) {
  4190. DXASSERT(isQuad, "quad only write one channel of out factor");
  4191. (void)isQuad;
  4192. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  4193. // Splat clampedI to float2.
  4194. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  4195. }
  4196. Builder.CreateStore(clampedI, unroundedInsideFactor);
  4197. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  4198. if (outFactorTy != finalI->getType()) {
  4199. DXASSERT(isQuad, "quad only write one channel of out factor");
  4200. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  4201. // Splat finalI to float2.
  4202. finalI = SplatToVector(finalI, outFactorTy, Builder);
  4203. }
  4204. Builder.CreateStore(finalI, roundedInsideFactor);
  4205. return nullptr;
  4206. }
  4207. }
  4208. // Ray Tracing.
  4209. namespace {
  4210. Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
  4211. OP::OpCode opcode,
  4212. HLOperationLowerHelper &helper,
  4213. HLObjectOperationLowerHelper *pObjHelper,
  4214. bool &Translated) {
  4215. hlsl::OP *hlslOP = &helper.hlslOP;
  4216. Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4217. Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4218. Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4219. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4220. Type *Ty = Attr->getType();
  4221. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4222. IRBuilder<> Builder(CI);
  4223. return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
  4224. }
  4225. Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
  4226. OP::OpCode opcode,
  4227. HLOperationLowerHelper &helper,
  4228. HLObjectOperationLowerHelper *pObjHelper,
  4229. bool &Translated) {
  4230. hlsl::OP *hlslOP = &helper.hlslOP;
  4231. Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  4232. Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4233. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4234. Type *Ty = Parameter->getType();
  4235. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4236. IRBuilder<> Builder(CI);
  4237. return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
  4238. }
  4239. Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4240. HLOperationLowerHelper &helper,
  4241. HLObjectOperationLowerHelper *pObjHelper,
  4242. bool &Translated) {
  4243. hlsl::OP *hlslOP = &helper.hlslOP;
  4244. Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
  4245. Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
  4246. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4247. Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
  4248. Args[0] = opArg;
  4249. for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
  4250. Args[i] = CI->getArgOperand(i);
  4251. }
  4252. IRBuilder<> Builder(CI);
  4253. // struct RayDesc
  4254. //{
  4255. // float3 Origin;
  4256. // float TMin;
  4257. // float3 Direction;
  4258. // float TMax;
  4259. //};
  4260. Value *zeroIdx = hlslOP->GetU32Const(0);
  4261. Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
  4262. origin = Builder.CreateLoad(origin);
  4263. unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
  4264. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4265. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4266. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4267. Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
  4268. tmin = Builder.CreateLoad(tmin);
  4269. Args[index++] = tmin;
  4270. Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
  4271. direction = Builder.CreateLoad(direction);
  4272. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4273. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4274. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4275. Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
  4276. tmax = Builder.CreateLoad(tmax);
  4277. Args[index++] = tmax;
  4278. Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
  4279. Type *Ty = payLoad->getType();
  4280. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4281. return Builder.CreateCall(F, Args);
  4282. }
  4283. // RayQuery methods
  4284. Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4285. HLOperationLowerHelper &helper,
  4286. HLObjectOperationLowerHelper *pObjHelper,
  4287. bool &Translated) {
  4288. hlsl::OP *hlslOP = &helper.hlslOP;
  4289. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  4290. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  4291. }
  4292. Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4293. HLOperationLowerHelper &helper,
  4294. HLObjectOperationLowerHelper *pObjHelper,
  4295. bool &Translated) {
  4296. hlsl::OP *hlslOP = &helper.hlslOP;
  4297. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4298. Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
  4299. Args[0] = opArg;
  4300. for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
  4301. Args[i] = CI->getArgOperand(i);
  4302. }
  4303. IRBuilder<> Builder(CI);
  4304. unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx;
  4305. unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
  4306. // struct RayDesc
  4307. //{
  4308. // float3 Origin;
  4309. Value *origin = CI->getArgOperand(hlIndex++);
  4310. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4311. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4312. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4313. // float TMin;
  4314. Args[index++] = CI->getArgOperand(hlIndex++);
  4315. // float3 Direction;
  4316. Value *direction = CI->getArgOperand(hlIndex++);
  4317. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4318. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4319. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4320. // float TMax;
  4321. Args[index++] = CI->getArgOperand(hlIndex++);
  4322. //};
  4323. DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp);
  4324. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4325. return Builder.CreateCall(F, Args);
  4326. }
  4327. Value *TranslateCommitProceduralPrimitiveHit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4328. HLOperationLowerHelper &helper,
  4329. HLObjectOperationLowerHelper *pObjHelper,
  4330. bool &Translated) {
  4331. hlsl::OP *hlslOP = &helper.hlslOP;
  4332. Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4333. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4334. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4335. Value *Args[] = {opArg, handle, THit};
  4336. IRBuilder<> Builder(CI);
  4337. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4338. return Builder.CreateCall(F, Args);
  4339. }
  4340. Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4341. HLOperationLowerHelper &helper,
  4342. HLObjectOperationLowerHelper *pObjHelper,
  4343. bool &Translated) {
  4344. hlsl::OP *hlslOP = &helper.hlslOP;
  4345. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4346. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4347. IRBuilder<> Builder(CI);
  4348. Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
  4349. return Builder.CreateCall(F, {opArg, handle});
  4350. }
  4351. Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4352. HLOperationLowerHelper &helper,
  4353. HLObjectOperationLowerHelper *pObjHelper,
  4354. bool &Translated) {
  4355. hlsl::OP *hlslOP = &helper.hlslOP;
  4356. VectorType *Ty = cast<VectorType>(CI->getType());
  4357. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4358. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4359. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4360. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4361. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4362. Value *retVal =
  4363. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4364. return retVal;
  4365. }
  4366. Value *TranslateRayQueryTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4367. HLOperationLowerHelper &helper,
  4368. HLObjectOperationLowerHelper *pObjHelper,
  4369. bool &Translated) {
  4370. hlsl::OP *hlslOP = &helper.hlslOP;
  4371. VectorType *Ty = cast<VectorType>(CI->getType());
  4372. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4373. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4374. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4375. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4376. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4377. Value *retVal =
  4378. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4379. return retVal;
  4380. }
  4381. Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4382. HLOperationLowerHelper &helper,
  4383. HLObjectOperationLowerHelper *pObjHelper,
  4384. bool &Translated) {
  4385. hlsl::OP *hlslOP = &helper.hlslOP;
  4386. VectorType *Ty = cast<VectorType>(CI->getType());
  4387. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4388. uint8_t elementVals[] = {0, 1};
  4389. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4390. Value *retVal =
  4391. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4392. return retVal;
  4393. }
  4394. Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4395. HLOperationLowerHelper &helper,
  4396. HLObjectOperationLowerHelper *pObjHelper,
  4397. bool &Translated) {
  4398. hlsl::OP *hlslOP = &helper.hlslOP;
  4399. VectorType *Ty = cast<VectorType>(CI->getType());
  4400. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4401. uint8_t elementVals[] = {0, 1, 2};
  4402. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4403. Value *retVal =
  4404. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4405. return retVal;
  4406. }
  4407. Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4408. HLOperationLowerHelper &helper,
  4409. HLObjectOperationLowerHelper *pObjHelper,
  4410. bool &Translated) {
  4411. hlsl::OP *hlslOP = &helper.hlslOP;
  4412. VectorType *Ty = cast<VectorType>(CI->getType());
  4413. uint8_t vals[] = {0,1,2,3};
  4414. Constant *src = ConstantDataVector::get(CI->getContext(), vals);
  4415. Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
  4416. return retVal;
  4417. }
  4418. Value *TranslateNoArgMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4419. HLOperationLowerHelper &helper,
  4420. HLObjectOperationLowerHelper *pObjHelper,
  4421. bool &Translated) {
  4422. hlsl::OP *hlslOP = &helper.hlslOP;
  4423. VectorType *Ty = cast<VectorType>(CI->getType());
  4424. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4425. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4426. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4427. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4428. Value *retVal =
  4429. TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
  4430. return retVal;
  4431. }
  4432. Value *TranslateNoArgTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4433. HLOperationLowerHelper &helper,
  4434. HLObjectOperationLowerHelper *pObjHelper,
  4435. bool &Translated) {
  4436. hlsl::OP *hlslOP = &helper.hlslOP;
  4437. VectorType *Ty = cast<VectorType>(CI->getType());
  4438. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4439. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4440. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4441. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4442. Value *retVal =
  4443. TrivialDxilOperation(opcode, { nullptr, rows, cols }, Ty, CI, hlslOP);
  4444. return retVal;
  4445. }
  4446. Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4447. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4448. Instruction *pResult = cast<Instruction>(
  4449. TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
  4450. // HL intrinsic must have had a return injected just after the call.
  4451. // SROA_Parameter_HLSL will copy from alloca to output just before each return.
  4452. // Now move call after the copy and just before the return.
  4453. if (isa<ReturnInst>(pResult->getNextNode()))
  4454. return pResult;
  4455. ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
  4456. pResult->removeFromParent();
  4457. pResult->insertBefore(RetI);
  4458. return pResult;
  4459. }
  4460. // Special half dot2 with accumulate to float
  4461. Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4462. HLOperationLowerHelper &helper,
  4463. HLObjectOperationLowerHelper *pObjHelper,
  4464. bool &Translated) {
  4465. hlsl::OP *hlslOP = &helper.hlslOP;
  4466. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4467. const unsigned vecSize = 2;
  4468. DXASSERT(src0->getType()->isVectorTy() &&
  4469. vecSize == src0->getType()->getVectorNumElements() &&
  4470. src0->getType()->getScalarType()->isHalfTy(),
  4471. "otherwise, unexpected input dimension or component type");
  4472. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4473. DXASSERT(src0->getType() == src1->getType(),
  4474. "otherwise, mismatched argument types");
  4475. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4476. Type *accTy = accArg->getType();
  4477. DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
  4478. "otherwise, unexpected accumulator type");
  4479. IRBuilder<> Builder(CI);
  4480. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4481. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4482. SmallVector<Value *, 6> args;
  4483. args.emplace_back(opArg);
  4484. args.emplace_back(accArg);
  4485. for (unsigned i = 0; i < vecSize; i++)
  4486. args.emplace_back(Builder.CreateExtractElement(src0, i));
  4487. for (unsigned i = 0; i < vecSize; i++)
  4488. args.emplace_back(Builder.CreateExtractElement(src1, i));
  4489. return Builder.CreateCall(dxilFunc, args);
  4490. }
  4491. Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4492. HLOperationLowerHelper &helper,
  4493. HLObjectOperationLowerHelper *pObjHelper,
  4494. bool &Translated) {
  4495. hlsl::OP *hlslOP = &helper.hlslOP;
  4496. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4497. DXASSERT(
  4498. !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
  4499. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4500. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4501. DXASSERT(src0->getType() == src1->getType(), "otherwise, mismatched argument types");
  4502. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4503. Type *accTy = accArg->getType();
  4504. DXASSERT(!accTy->isVectorTy() && accTy->isIntegerTy(32),
  4505. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4506. IRBuilder<> Builder(CI);
  4507. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4508. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4509. return Builder.CreateCall(dxilFunc, { opArg, accArg, src0, src1 });
  4510. }
  4511. } // namespace
  4512. // Resource Handle.
  4513. namespace {
  4514. Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
  4515. DXIL::OpCode opcode,
  4516. HLOperationLowerHelper &helper,
  4517. HLObjectOperationLowerHelper *pObjHelper,
  4518. bool &Translated) {
  4519. hlsl::OP &hlslOP = helper.hlslOP;
  4520. Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy);
  4521. IRBuilder<> Builder(CI);
  4522. Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode);
  4523. return Builder.CreateCall(
  4524. dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx),
  4525. // TODO: update nonUniformIndex later.
  4526. Builder.getInt1(false)});
  4527. }
  4528. }
  4529. // Lower table.
  4530. namespace {
  4531. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4532. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4533. Translated = false;
  4534. dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic.");
  4535. return nullptr;
  4536. }
  4537. // SPIRV change starts
  4538. #ifdef ENABLE_SPIRV_CODEGEN
  4539. Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
  4540. DXIL::OpCode opcode,
  4541. HLOperationLowerHelper &helper,
  4542. HLObjectOperationLowerHelper *pObjHelper,
  4543. bool &Translated) {
  4544. Translated = false;
  4545. dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
  4546. return nullptr;
  4547. }
  4548. #endif // ENABLE_SPIRV_CODEGEN
  4549. // SPIRV change ends
  4550. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4551. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4552. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  4553. // Do nothing here.
  4554. // Mark not translated.
  4555. Translated = false;
  4556. return nullptr;
  4557. }
  4558. // This table has to match IntrinsicOp orders
  4559. IntrinsicLower gLowerTable[] = {
  4560. {IntrinsicOp::IOP_AcceptHitAndEndSearch, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
  4561. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  4562. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4563. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4564. {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery},
  4565. {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
  4566. {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
  4567. {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, DXIL::OpCode::CreateHandleFromHeap},
  4568. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  4569. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4570. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4571. {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh, DXIL::OpCode::DispatchMesh },
  4572. {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
  4573. {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
  4574. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  4575. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  4576. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  4577. {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::GeometryIndex},
  4578. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  4579. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  4580. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  4581. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4582. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4583. {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
  4584. {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::IgnoreHit},
  4585. {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
  4586. {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
  4587. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4588. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4589. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4590. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4591. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4592. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4593. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4594. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4595. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4596. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  4597. {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
  4598. {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
  4599. {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4600. {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4601. {IntrinsicOp::IOP_ObjectToWorld4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4602. {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveIndex},
  4603. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4604. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4605. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4606. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  4607. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4608. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4609. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4610. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4611. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4612. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4613. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4614. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4615. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4616. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  4617. {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
  4618. {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
  4619. {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
  4620. {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
  4621. {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts, DXIL::OpCode::SetMeshOutputCounts},
  4622. {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
  4623. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  4624. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  4625. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  4626. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  4627. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4628. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4629. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4630. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  4631. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4632. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4633. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4634. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4635. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  4636. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  4637. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  4638. {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch},
  4639. {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4640. {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4641. {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4642. {IntrinsicOp::IOP_WaveMultiPrefixCountBits, TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount},
  4643. {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4644. {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4645. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  4646. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4647. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4648. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  4649. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  4650. {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
  4651. {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
  4652. {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4653. {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4654. {IntrinsicOp::IOP_WorldToObject4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4655. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  4656. {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
  4657. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  4658. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  4659. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  4660. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  4661. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4662. {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4663. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  4664. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4665. {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4666. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  4667. {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
  4668. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  4669. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  4670. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  4671. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  4672. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  4673. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  4674. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  4675. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  4676. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  4677. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4678. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4679. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  4680. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4681. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4682. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  4683. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  4684. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  4685. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  4686. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  4687. {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
  4688. {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddI8Packed},
  4689. {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddU8Packed},
  4690. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  4691. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  4692. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  4693. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  4694. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  4695. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  4696. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  4697. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  4698. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  4699. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  4700. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  4701. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  4702. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  4703. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  4704. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  4705. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  4706. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  4707. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  4708. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  4709. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  4710. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  4711. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  4712. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  4713. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  4714. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  4715. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  4716. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  4717. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  4718. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  4719. {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
  4720. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  4721. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  4722. {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes},
  4723. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  4724. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  4725. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  4726. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  4727. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  4728. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  4729. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  4730. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  4731. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  4732. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  4733. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  4734. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  4735. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  4736. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  4737. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  4738. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  4739. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  4740. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  4741. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4742. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4743. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4744. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4745. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4746. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4747. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4748. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4749. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4750. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4751. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4752. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4753. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4754. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4755. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4756. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  4757. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4758. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4759. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4760. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4761. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  4762. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  4763. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  4764. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  4765. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4766. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4767. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  4768. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4769. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  4770. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  4771. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  4772. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  4773. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  4774. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  4775. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  4776. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  4777. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  4778. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4779. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4780. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4781. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4782. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4783. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  4784. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  4785. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  4786. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4787. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4788. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4789. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4790. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4791. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4792. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4793. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4794. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4795. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4796. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4797. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4798. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4799. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4800. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4801. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4802. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4803. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4804. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  4805. {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedback},
  4806. {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackBias},
  4807. {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackGrad},
  4808. {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackLevel},
  4809. {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Abort},
  4810. {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateGeometryIndex},
  4811. {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex},
  4812. {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceID},
  4813. {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceIndex},
  4814. {IntrinsicOp::MOP_CandidateObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayDirection},
  4815. {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayOrigin},
  4816. {IntrinsicOp::MOP_CandidateObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  4817. {IntrinsicOp::MOP_CandidateObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  4818. {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidatePrimitiveIndex},
  4819. {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque},
  4820. {IntrinsicOp::MOP_CandidateTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics},
  4821. {IntrinsicOp::MOP_CandidateTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleFrontFace},
  4822. {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleRayT},
  4823. {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateType},
  4824. {IntrinsicOp::MOP_CandidateWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  4825. {IntrinsicOp::MOP_CandidateWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  4826. {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit},
  4827. {IntrinsicOp::MOP_CommitProceduralPrimitiveHit, TranslateCommitProceduralPrimitiveHit, DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit},
  4828. {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedGeometryIndex},
  4829. {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex},
  4830. {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceID},
  4831. {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceIndex},
  4832. {IntrinsicOp::MOP_CommittedObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayDirection},
  4833. {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayOrigin},
  4834. {IntrinsicOp::MOP_CommittedObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  4835. {IntrinsicOp::MOP_CommittedObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  4836. {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedPrimitiveIndex},
  4837. {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedRayT},
  4838. {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedStatus},
  4839. {IntrinsicOp::MOP_CommittedTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics},
  4840. {IntrinsicOp::MOP_CommittedTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedTriangleFrontFace},
  4841. {IntrinsicOp::MOP_CommittedWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  4842. {IntrinsicOp::MOP_CommittedWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  4843. {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Proceed},
  4844. {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayFlags},
  4845. {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayTMin},
  4846. {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, DXIL::OpCode::RayQuery_TraceRayInline},
  4847. {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayDirection},
  4848. {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayOrigin},
  4849. // SPIRV change starts
  4850. #ifdef ENABLE_SPIRV_CODEGEN
  4851. {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes},
  4852. #endif // ENABLE_SPIRV_CODEGEN
  4853. // SPIRV change ends
  4854. // Manully added part.
  4855. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4856. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4857. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4858. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4859. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4860. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4861. { IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  4862. { IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  4863. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4864. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4865. { IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes },
  4866. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  4867. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  4868. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  4869. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  4870. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  4871. { IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul },
  4872. { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
  4873. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4874. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4875. };
  4876. }
  4877. static_assert(sizeof(gLowerTable) / sizeof(gLowerTable[0]) == static_cast<size_t>(IntrinsicOp::Num_Intrinsics),
  4878. "Intrinsic lowering table must be updated to account for new intrinsics.");
  4879. static void TranslateBuiltinIntrinsic(CallInst *CI,
  4880. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4881. unsigned opcode = hlsl::GetHLOpcode(CI);
  4882. const IntrinsicLower &lower = gLowerTable[opcode];
  4883. Value *Result =
  4884. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  4885. if (Result)
  4886. CI->replaceAllUsesWith(Result);
  4887. }
  4888. // SharedMem.
  4889. namespace {
  4890. bool IsSharedMemPtr(Value *Ptr) {
  4891. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  4892. }
  4893. bool IsLocalVariablePtr(Value *Ptr) {
  4894. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  4895. Ptr = GEP->getPointerOperand();
  4896. }
  4897. bool isAlloca = isa<AllocaInst>(Ptr);
  4898. if (isAlloca) return true;
  4899. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  4900. if (!GV) return false;
  4901. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  4902. }
  4903. }
  4904. // Constant buffer.
  4905. namespace {
  4906. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  4907. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  4908. "not an element type");
  4909. // TODO: Use real size after change constant buffer into linear layout.
  4910. if (DL.getTypeSizeInBits(EltType) <= 32) {
  4911. // Constant buffer is 4 bytes align.
  4912. return 4;
  4913. } else
  4914. return 8;
  4915. }
  4916. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  4917. IRBuilder<> &Builder) {
  4918. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  4919. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  4920. // Align to 8 bytes for now.
  4921. Constant *align = hlslOP->GetU32Const(8);
  4922. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  4923. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  4924. }
  4925. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  4926. bool colMajor, OP *OP, const DataLayout &DL,
  4927. IRBuilder<> &Builder) {
  4928. HLMatrixType MatTy = HLMatrixType::cast(matType);
  4929. Type *EltTy = MatTy.getElementTypeForMem();
  4930. unsigned matSize = MatTy.getNumElements();
  4931. std::vector<Value *> elts(matSize);
  4932. Value *EltByteSize = ConstantInt::get(
  4933. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4934. // TODO: use real size after change constant buffer into linear layout.
  4935. Value *baseOffset = offset;
  4936. for (unsigned i = 0; i < matSize; i++) {
  4937. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  4938. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  4939. }
  4940. Value* Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  4941. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  4942. return Vec;
  4943. }
  4944. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4945. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4946. DxilFieldAnnotation *prevFieldAnnotation,
  4947. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4948. HLObjectOperationLowerHelper *pObjHelper);
  4949. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  4950. IRBuilder<> &Builder, bool bInsertLdNextToGEP) {
  4951. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  4952. Value *baseIdx = (GEP->idx_begin())->get();
  4953. Value *zeroIdx = Builder.getInt32(0);
  4954. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  4955. "base index must be 0");
  4956. Value *idx = (GEP->idx_begin() + 1)->get();
  4957. if (dyn_cast<ConstantInt>(idx)) {
  4958. return Builder.CreateExtractElement(ldData, idx);
  4959. } else {
  4960. // Dynamic indexing.
  4961. // Copy vec to array.
  4962. Type *Ty = ldData->getType();
  4963. Type *EltTy = Ty->getVectorElementType();
  4964. unsigned vecSize = Ty->getVectorNumElements();
  4965. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  4966. IRBuilder<> AllocaBuilder(
  4967. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4968. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4969. Value *zero = Builder.getInt32(0);
  4970. for (unsigned int i = 0; i < vecSize; i++) {
  4971. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  4972. Value *Ptr =
  4973. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  4974. Builder.CreateStore(Elt, Ptr);
  4975. }
  4976. // Load from temp array.
  4977. if (bInsertLdNextToGEP) {
  4978. // Insert the new GEP just before the old and to-be-deleted GEP
  4979. Builder.SetInsertPoint(GEP);
  4980. }
  4981. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4982. return Builder.CreateLoad(EltGEP);
  4983. }
  4984. }
  4985. void TranslateResourceInCB(LoadInst *LI,
  4986. HLObjectOperationLowerHelper *pObjHelper,
  4987. GlobalVariable *CbGV) {
  4988. if (LI->user_empty()) {
  4989. LI->eraseFromParent();
  4990. return;
  4991. }
  4992. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  4993. CallInst *CI = cast<CallInst>(LI->user_back());
  4994. CallInst *Anno = cast<CallInst>(CI->user_back());
  4995. DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
  4996. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
  4997. // Lower Ptr to GV base Ptr.
  4998. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  4999. IRBuilder<> Builder(LI);
  5000. Value *GvLd = Builder.CreateLoad(GvPtr);
  5001. LI->replaceAllUsesWith(GvLd);
  5002. LI->eraseFromParent();
  5003. }
  5004. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  5005. hlsl::OP *hlslOP,
  5006. DxilFieldAnnotation *prevFieldAnnotation,
  5007. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5008. HLObjectOperationLowerHelper *pObjHelper) {
  5009. IRBuilder<> Builder(user);
  5010. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5011. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5012. unsigned opcode = GetHLOpcode(CI);
  5013. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5014. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5015. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5016. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5017. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5018. "No store on cbuffer");
  5019. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5020. ->getType()
  5021. ->getPointerElementType();
  5022. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  5023. colMajor, hlslOP, DL, Builder);
  5024. CI->replaceAllUsesWith(newLd);
  5025. CI->eraseFromParent();
  5026. } else if (group == HLOpcodeGroup::HLSubscript) {
  5027. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5028. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5029. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5030. Type *EltTy = MatTy.getElementTypeForReg();
  5031. Value *EltByteSize = ConstantInt::get(
  5032. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5033. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5034. Type *resultType = CI->getType()->getPointerElementType();
  5035. unsigned resultSize = 1;
  5036. if (resultType->isVectorTy())
  5037. resultSize = resultType->getVectorNumElements();
  5038. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5039. _Analysis_assume_(resultSize <= 16);
  5040. Value *idxList[16];
  5041. switch (subOp) {
  5042. case HLSubscriptOpcode::ColMatSubscript:
  5043. case HLSubscriptOpcode::RowMatSubscript: {
  5044. for (unsigned i = 0; i < resultSize; i++) {
  5045. Value *idx =
  5046. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5047. Value *offset = Builder.CreateMul(idx, EltByteSize);
  5048. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5049. }
  5050. } break;
  5051. case HLSubscriptOpcode::RowMatElement:
  5052. case HLSubscriptOpcode::ColMatElement: {
  5053. Constant *EltIdxs = cast<Constant>(idx);
  5054. for (unsigned i = 0; i < resultSize; i++) {
  5055. Value *offset =
  5056. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5057. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5058. }
  5059. } break;
  5060. default:
  5061. DXASSERT(0, "invalid operation on const buffer");
  5062. break;
  5063. }
  5064. Value *ldData = UndefValue::get(resultType);
  5065. if (resultType->isVectorTy()) {
  5066. for (unsigned i = 0; i < resultSize; i++) {
  5067. Value *eltData =
  5068. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  5069. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5070. }
  5071. } else {
  5072. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  5073. }
  5074. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5075. Value *subsUser = *(U++);
  5076. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5077. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5078. /*bInsertLdNextToGEP*/ true);
  5079. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5080. Value *gepUser = *(gepU++);
  5081. // Must be load here;
  5082. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5083. ldUser->replaceAllUsesWith(subData);
  5084. ldUser->eraseFromParent();
  5085. }
  5086. GEP->eraseFromParent();
  5087. } else {
  5088. // Must be load here.
  5089. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5090. ldUser->replaceAllUsesWith(ldData);
  5091. ldUser->eraseFromParent();
  5092. }
  5093. }
  5094. CI->eraseFromParent();
  5095. } else {
  5096. DXASSERT(0, "not implemented yet");
  5097. }
  5098. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5099. Type *Ty = ldInst->getType();
  5100. Type *EltTy = Ty->getScalarType();
  5101. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5102. if (dxilutil::IsHLSLObjectType(Ty)) {
  5103. CallInst *CI = cast<CallInst>(handle);
  5104. GlobalVariable *CbGV = cast<GlobalVariable>(
  5105. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5106. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5107. return;
  5108. }
  5109. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5110. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5111. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  5112. if (Ty->isVectorTy()) {
  5113. Value *result = UndefValue::get(Ty);
  5114. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  5115. // Update offset by 4 bytes.
  5116. Value *offset =
  5117. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  5118. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  5119. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  5120. result = Builder.CreateInsertElement(result, elt, i);
  5121. // Update offset by 4 bytes.
  5122. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  5123. }
  5124. newLd = result;
  5125. }
  5126. ldInst->replaceAllUsesWith(newLd);
  5127. ldInst->eraseFromParent();
  5128. } else {
  5129. // Must be GEP here
  5130. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5131. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  5132. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5133. GEP->eraseFromParent();
  5134. }
  5135. }
  5136. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  5137. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5138. DxilFieldAnnotation *prevFieldAnnotation,
  5139. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5140. HLObjectOperationLowerHelper *pObjHelper) {
  5141. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5142. Value *offset = baseOffset;
  5143. // update offset
  5144. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5145. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5146. for (; GEPIt != E; GEPIt++) {
  5147. Value *idx = GEPIt.getOperand();
  5148. unsigned immIdx = 0;
  5149. bool bImmIdx = false;
  5150. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5151. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5152. bImmIdx = true;
  5153. }
  5154. if (GEPIt->isPointerTy()) {
  5155. Type *EltTy = GEPIt->getPointerElementType();
  5156. unsigned size = 0;
  5157. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5158. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5159. size = annotation->GetCBufferSize();
  5160. } else {
  5161. DXASSERT(fieldAnnotation, "must be a field");
  5162. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5163. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5164. *fieldAnnotation, EltTy, dxilTypeSys);
  5165. // Decide the nested array size.
  5166. unsigned nestedArraySize = 1;
  5167. Type *EltTy = AT->getArrayElementType();
  5168. // support multi level of array
  5169. while (EltTy->isArrayTy()) {
  5170. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5171. nestedArraySize *= EltAT->getNumElements();
  5172. EltTy = EltAT->getElementType();
  5173. }
  5174. // Align to 4 * 4 bytes.
  5175. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5176. size = nestedArraySize * alignedSize;
  5177. } else {
  5178. size = DL.getTypeAllocSize(EltTy);
  5179. }
  5180. }
  5181. // Align to 4 * 4 bytes.
  5182. size = (size + 15) & 0xfffffff0;
  5183. if (bImmIdx) {
  5184. unsigned tempOffset = size * immIdx;
  5185. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5186. } else {
  5187. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5188. offset = Builder.CreateAdd(offset, tempOffset);
  5189. }
  5190. } else if (GEPIt->isStructTy()) {
  5191. StructType *ST = cast<StructType>(*GEPIt);
  5192. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5193. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5194. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  5195. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  5196. } else if (GEPIt->isArrayTy()) {
  5197. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5198. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5199. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5200. // Decide the nested array size.
  5201. unsigned nestedArraySize = 1;
  5202. Type *EltTy = GEPIt->getArrayElementType();
  5203. // support multi level of array
  5204. while (EltTy->isArrayTy()) {
  5205. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5206. nestedArraySize *= EltAT->getNumElements();
  5207. EltTy = EltAT->getElementType();
  5208. }
  5209. // Align to 4 * 4 bytes.
  5210. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5211. unsigned size = nestedArraySize * alignedSize;
  5212. if (bImmIdx) {
  5213. unsigned tempOffset = size * immIdx;
  5214. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5215. } else {
  5216. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5217. offset = Builder.CreateAdd(offset, tempOffset);
  5218. }
  5219. } else if (GEPIt->isVectorTy()) {
  5220. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5221. if (bImmIdx) {
  5222. unsigned tempOffset = size * immIdx;
  5223. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5224. } else {
  5225. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5226. offset = Builder.CreateAdd(offset, tempOffset);
  5227. }
  5228. } else {
  5229. gep_type_iterator temp = GEPIt;
  5230. temp++;
  5231. DXASSERT(temp == E, "scalar type must be the last");
  5232. }
  5233. }
  5234. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5235. Instruction *user = cast<Instruction>(*(U++));
  5236. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  5237. dxilTypeSys, DL, pObjHelper);
  5238. }
  5239. }
  5240. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  5241. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5242. HLObjectOperationLowerHelper *pObjHelper) {
  5243. auto User = ptr->user_begin();
  5244. auto UserE = ptr->user_end();
  5245. for (; User != UserE;) {
  5246. // Must be Instruction.
  5247. Instruction *I = cast<Instruction>(*(User++));
  5248. TranslateCBAddressUser(I, handle, offset, hlslOP,
  5249. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL,
  5250. pObjHelper);
  5251. }
  5252. }
  5253. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5254. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  5255. IRBuilder<> &Builder) {
  5256. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5257. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5258. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5259. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5260. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5261. Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
  5262. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5263. bool is16 = (EltTy == halfTy || EltTy == i16Ty) && !hlslOP->UseMinPrecision();
  5264. DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
  5265. "legacy cbuffer don't across 16 bytes register.");
  5266. if (is64) {
  5267. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5268. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5269. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  5270. unsigned eltIdx = channelOffset>>1;
  5271. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  5272. return Result;
  5273. } else {
  5274. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5275. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5276. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  5277. }
  5278. }
  5279. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5280. unsigned channelOffset, Type *EltTy,
  5281. unsigned vecSize, OP *hlslOP,
  5282. IRBuilder<> &Builder) {
  5283. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5284. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5285. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5286. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5287. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5288. Type *shortTy = Type::getInt16Ty(EltTy->getContext());
  5289. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5290. bool is16 = (EltTy == shortTy || EltTy == halfTy) && !hlslOP->UseMinPrecision();
  5291. DXASSERT((is16 && channelOffset + vecSize <= 8) ||
  5292. (channelOffset + vecSize) <= 4,
  5293. "legacy cbuffer don't across 16 bytes register.");
  5294. if (is16) {
  5295. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5296. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5297. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5298. for (unsigned i = 0; i < vecSize; ++i) {
  5299. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5300. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5301. }
  5302. return Result;
  5303. } else if (is64) {
  5304. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5305. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5306. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5307. unsigned smallVecSize = 2;
  5308. if (vecSize < smallVecSize)
  5309. smallVecSize = vecSize;
  5310. for (unsigned i = 0; i < smallVecSize; ++i) {
  5311. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  5312. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5313. }
  5314. if (vecSize > 2) {
  5315. // Got to next cb register.
  5316. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  5317. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5318. for (unsigned i = 2; i < vecSize; ++i) {
  5319. Value *NewElt =
  5320. Builder.CreateExtractValue(loadLegacy, i-2);
  5321. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5322. }
  5323. }
  5324. return Result;
  5325. } else {
  5326. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5327. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5328. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5329. for (unsigned i = 0; i < vecSize; ++i) {
  5330. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5331. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5332. }
  5333. return Result;
  5334. }
  5335. }
  5336. Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
  5337. Value *legacyIdx, bool colMajor, OP *OP,
  5338. bool memElemRepr, const DataLayout &DL,
  5339. IRBuilder<> &Builder) {
  5340. Type *EltTy = MatTy.getElementTypeForMem();
  5341. unsigned matSize = MatTy.getNumElements();
  5342. std::vector<Value *> elts(matSize);
  5343. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5344. if (colMajor) {
  5345. unsigned colByteSize = 4 * EltByteSize;
  5346. unsigned colRegSize = (colByteSize + 15) >> 4;
  5347. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5348. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5349. EltTy, MatTy.getNumRows(), OP, Builder);
  5350. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5351. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  5352. elts[matIdx] = Builder.CreateExtractElement(col, r);
  5353. }
  5354. // Update offset for a column.
  5355. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  5356. }
  5357. } else {
  5358. unsigned rowByteSize = 4 * EltByteSize;
  5359. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  5360. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5361. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5362. EltTy, MatTy.getNumColumns(), OP, Builder);
  5363. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5364. unsigned matIdx = MatTy.getRowMajorIndex(r, c);
  5365. elts[matIdx] = Builder.CreateExtractElement(row, c);
  5366. }
  5367. // Update offset for a row.
  5368. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  5369. }
  5370. }
  5371. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5372. if (!memElemRepr)
  5373. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5374. return Vec;
  5375. }
  5376. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5377. Value *legacyIdx, unsigned channelOffset,
  5378. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5379. DxilFieldAnnotation *prevFieldAnnotation,
  5380. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5381. HLObjectOperationLowerHelper *pObjHelper);
  5382. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  5383. Value *legacyIdx, unsigned channelOffset,
  5384. hlsl::OP *hlslOP,
  5385. DxilFieldAnnotation *prevFieldAnnotation,
  5386. DxilTypeSystem &dxilTypeSys,
  5387. const DataLayout &DL,
  5388. HLObjectOperationLowerHelper *pObjHelper) {
  5389. IRBuilder<> Builder(user);
  5390. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5391. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5392. unsigned opcode = GetHLOpcode(CI);
  5393. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5394. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5395. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5396. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5397. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5398. "No store on cbuffer");
  5399. HLMatrixType MatTy = HLMatrixType::cast(
  5400. CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5401. ->getType()->getPointerElementType());
  5402. // This will replace a call, so we should use the register representation of elements
  5403. Value *newLd = TranslateConstBufMatLdLegacy(
  5404. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/false, DL, Builder);
  5405. CI->replaceAllUsesWith(newLd);
  5406. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5407. CI->eraseFromParent();
  5408. } else if (group == HLOpcodeGroup::HLSubscript) {
  5409. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5410. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5411. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5412. Type *EltTy = MatTy.getElementTypeForReg();
  5413. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5414. Type *resultType = CI->getType()->getPointerElementType();
  5415. unsigned resultSize = 1;
  5416. if (resultType->isVectorTy())
  5417. resultSize = resultType->getVectorNumElements();
  5418. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5419. _Analysis_assume_(resultSize <= 16);
  5420. Value *idxList[16];
  5421. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  5422. subOp == HLSubscriptOpcode::ColMatElement;
  5423. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  5424. !isa<ConstantAggregateZero>(idx) &&
  5425. !isa<ConstantDataSequential>(idx);
  5426. Value *ldData = UndefValue::get(resultType);
  5427. if (!dynamicIndexing) {
  5428. // This will replace a load or GEP, so we should use the memory representation of elements
  5429. Value *matLd = TranslateConstBufMatLdLegacy(
  5430. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/true, DL, Builder);
  5431. // The matLd is keep original layout, just use the idx calc in
  5432. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  5433. switch (subOp) {
  5434. case HLSubscriptOpcode::RowMatSubscript:
  5435. case HLSubscriptOpcode::ColMatSubscript: {
  5436. for (unsigned i = 0; i < resultSize; i++) {
  5437. idxList[i] =
  5438. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5439. }
  5440. } break;
  5441. case HLSubscriptOpcode::RowMatElement:
  5442. case HLSubscriptOpcode::ColMatElement: {
  5443. Constant *EltIdxs = cast<Constant>(idx);
  5444. for (unsigned i = 0; i < resultSize; i++) {
  5445. idxList[i] = EltIdxs->getAggregateElement(i);
  5446. }
  5447. } break;
  5448. default:
  5449. DXASSERT(0, "invalid operation on const buffer");
  5450. break;
  5451. }
  5452. if (resultType->isVectorTy()) {
  5453. for (unsigned i = 0; i < resultSize; i++) {
  5454. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  5455. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5456. }
  5457. } else {
  5458. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  5459. ldData = eltData;
  5460. }
  5461. } else {
  5462. // Must be matSub here.
  5463. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5464. if (colMajor) {
  5465. // idx is c * row + r.
  5466. // For first col, c is 0, so idx is r.
  5467. Value *one = Builder.getInt32(1);
  5468. // row.x = c[0].[idx]
  5469. // row.y = c[1].[idx]
  5470. // row.z = c[2].[idx]
  5471. // row.w = c[3].[idx]
  5472. Value *Elts[4];
  5473. ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
  5474. IRBuilder<> AllocaBuilder(user->getParent()
  5475. ->getParent()
  5476. ->getEntryBlock()
  5477. .getFirstInsertionPt());
  5478. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  5479. Value *zero = AllocaBuilder.getInt32(0);
  5480. Value *cbufIdx = legacyIdx;
  5481. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5482. Value *ColVal =
  5483. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  5484. EltTy, MatTy.getNumRows(), hlslOP, Builder);
  5485. // Convert ColVal to array for indexing.
  5486. for (unsigned int r = 0; r < MatTy.getNumRows(); r++) {
  5487. Value *Elt =
  5488. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  5489. Value *Ptr = Builder.CreateInBoundsGEP(
  5490. tempArray, {zero, Builder.getInt32(r)});
  5491. Builder.CreateStore(Elt, Ptr);
  5492. }
  5493. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5494. Elts[c] = Builder.CreateLoad(Ptr);
  5495. // Update cbufIdx.
  5496. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  5497. }
  5498. if (resultType->isVectorTy()) {
  5499. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5500. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  5501. }
  5502. } else {
  5503. ldData = Elts[0];
  5504. }
  5505. } else {
  5506. // idx is r * col + c;
  5507. // r = idx / col;
  5508. Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
  5509. idx = Builder.CreateUDiv(idx, cCol);
  5510. idx = Builder.CreateAdd(idx, legacyIdx);
  5511. // Just return a row; 'col' is the number of columns in the row.
  5512. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  5513. MatTy.getNumColumns(), hlslOP, Builder);
  5514. }
  5515. if (!resultType->isVectorTy()) {
  5516. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  5517. }
  5518. }
  5519. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5520. Value *subsUser = *(U++);
  5521. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5522. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5523. /*bInsertLdNextToGEP*/ true);
  5524. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5525. Value *gepUser = *(gepU++);
  5526. // Must be load here;
  5527. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5528. ldUser->replaceAllUsesWith(subData);
  5529. ldUser->eraseFromParent();
  5530. }
  5531. GEP->eraseFromParent();
  5532. } else {
  5533. // Must be load here.
  5534. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5535. ldUser->replaceAllUsesWith(ldData);
  5536. ldUser->eraseFromParent();
  5537. }
  5538. }
  5539. CI->eraseFromParent();
  5540. } else if (group == HLOpcodeGroup::HLIntrinsic) {
  5541. // FIXME: This case is hit when using built-in structures in constant
  5542. // buffers passed directly to an intrinsic, such as:
  5543. // RayDesc from cbuffer passed to TraceRay.
  5544. DXASSERT(0, "not implemented yet");
  5545. } else {
  5546. DXASSERT(0, "not implemented yet");
  5547. }
  5548. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5549. Type *Ty = ldInst->getType();
  5550. Type *EltTy = Ty->getScalarType();
  5551. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5552. if (dxilutil::IsHLSLObjectType(Ty)) {
  5553. CallInst *CI = cast<CallInst>(handle);
  5554. GlobalVariable *CbGV = cast<GlobalVariable>(
  5555. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5556. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5557. return;
  5558. }
  5559. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5560. Value *newLd = nullptr;
  5561. if (Ty->isVectorTy())
  5562. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5563. Ty->getVectorNumElements(), hlslOP, Builder);
  5564. else
  5565. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5566. hlslOP, Builder);
  5567. ldInst->replaceAllUsesWith(newLd);
  5568. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5569. ldInst->eraseFromParent();
  5570. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  5571. for (auto it = BCI->user_begin(); it != BCI->user_end(); ) {
  5572. Instruction *I = cast<Instruction>(*it++);
  5573. TranslateCBAddressUserLegacy(I,
  5574. handle, legacyIdx, channelOffset, hlslOP,
  5575. prevFieldAnnotation, dxilTypeSys,
  5576. DL, pObjHelper);
  5577. }
  5578. BCI->eraseFromParent();
  5579. } else {
  5580. // Must be GEP here
  5581. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5582. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  5583. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5584. GEP->eraseFromParent();
  5585. }
  5586. }
  5587. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5588. Value *legacyIndex, unsigned channel,
  5589. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5590. DxilFieldAnnotation *prevFieldAnnotation,
  5591. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5592. HLObjectOperationLowerHelper *pObjHelper) {
  5593. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5594. // update offset
  5595. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5596. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5597. for (; GEPIt != E; GEPIt++) {
  5598. Value *idx = GEPIt.getOperand();
  5599. unsigned immIdx = 0;
  5600. bool bImmIdx = false;
  5601. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5602. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5603. bImmIdx = true;
  5604. }
  5605. if (GEPIt->isPointerTy()) {
  5606. Type *EltTy = GEPIt->getPointerElementType();
  5607. unsigned size = 0;
  5608. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5609. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5610. size = annotation->GetCBufferSize();
  5611. } else {
  5612. DXASSERT(fieldAnnotation, "must be a field");
  5613. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5614. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5615. *fieldAnnotation, EltTy, dxilTypeSys);
  5616. // Decide the nested array size.
  5617. unsigned nestedArraySize = 1;
  5618. Type *EltTy = AT->getArrayElementType();
  5619. // support multi level of array
  5620. while (EltTy->isArrayTy()) {
  5621. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5622. nestedArraySize *= EltAT->getNumElements();
  5623. EltTy = EltAT->getElementType();
  5624. }
  5625. // Align to 4 * 4 bytes.
  5626. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5627. size = nestedArraySize * alignedSize;
  5628. } else {
  5629. size = DL.getTypeAllocSize(EltTy);
  5630. }
  5631. }
  5632. // Skip 0 idx.
  5633. if (bImmIdx && immIdx == 0)
  5634. continue;
  5635. // Align to 4 * 4 bytes.
  5636. size = (size + 15) & 0xfffffff0;
  5637. // Take this as array idxing.
  5638. if (bImmIdx) {
  5639. unsigned tempOffset = size * immIdx;
  5640. unsigned idxInc = tempOffset >> 4;
  5641. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5642. } else {
  5643. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5644. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5645. }
  5646. // Array always start from x channel.
  5647. channel = 0;
  5648. } else if (GEPIt->isStructTy()) {
  5649. StructType *ST = cast<StructType>(*GEPIt);
  5650. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5651. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5652. unsigned idxInc = 0;
  5653. unsigned structOffset = 0;
  5654. if (fieldAnnotation->GetCompType().Is16Bit() &&
  5655. !hlslOP->UseMinPrecision()) {
  5656. structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
  5657. channel += structOffset;
  5658. idxInc = channel >> 3;
  5659. channel = channel & 0x7;
  5660. }
  5661. else {
  5662. structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
  5663. channel += structOffset;
  5664. idxInc = channel >> 2;
  5665. channel = channel & 0x3;
  5666. }
  5667. if (idxInc)
  5668. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5669. } else if (GEPIt->isArrayTy()) {
  5670. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5671. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5672. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5673. // Decide the nested array size.
  5674. unsigned nestedArraySize = 1;
  5675. Type *EltTy = GEPIt->getArrayElementType();
  5676. // support multi level of array
  5677. while (EltTy->isArrayTy()) {
  5678. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5679. nestedArraySize *= EltAT->getNumElements();
  5680. EltTy = EltAT->getElementType();
  5681. }
  5682. // Align to 4 * 4 bytes.
  5683. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5684. unsigned size = nestedArraySize * alignedSize;
  5685. if (bImmIdx) {
  5686. unsigned tempOffset = size * immIdx;
  5687. unsigned idxInc = tempOffset >> 4;
  5688. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5689. } else {
  5690. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5691. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5692. }
  5693. // Array always start from x channel.
  5694. channel = 0;
  5695. } else if (GEPIt->isVectorTy()) {
  5696. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5697. // Indexing on vector.
  5698. if (bImmIdx) {
  5699. unsigned tempOffset = size * immIdx;
  5700. if (size == 2) { // 16-bit types
  5701. unsigned channelInc = tempOffset >> 1;
  5702. DXASSERT((channel + channelInc) <= 8, "vector should not cross cb register (8x16bit)");
  5703. channel += channelInc;
  5704. if (channel == 8) {
  5705. // Get to another row.
  5706. // Update index and channel.
  5707. channel = 0;
  5708. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5709. }
  5710. }
  5711. else {
  5712. unsigned channelInc = tempOffset >> 2;
  5713. DXASSERT((channel + channelInc) <= 4, "vector should not cross cb register (8x32bit)");
  5714. channel += channelInc;
  5715. if (channel == 4) {
  5716. // Get to another row.
  5717. // Update index and channel.
  5718. channel = 0;
  5719. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5720. }
  5721. }
  5722. } else {
  5723. Type *EltTy = GEPIt->getVectorElementType();
  5724. // Load the whole register.
  5725. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  5726. /*channelOffset*/ 0, EltTy,
  5727. /*vecSize*/ 4, hlslOP, Builder);
  5728. // Copy to array.
  5729. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5730. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
  5731. Value *zeroIdx = hlslOP->GetU32Const(0);
  5732. for (unsigned i = 0; i < 4; i++) {
  5733. Value *Elt = Builder.CreateExtractElement(newLd, i);
  5734. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  5735. Builder.CreateStore(Elt, EltGEP);
  5736. }
  5737. // Make sure this is the end of GEP.
  5738. gep_type_iterator temp = GEPIt;
  5739. temp++;
  5740. DXASSERT(temp == E, "scalar type must be the last");
  5741. // Replace the GEP with array GEP.
  5742. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  5743. GEP->replaceAllUsesWith(ArrayGEP);
  5744. return;
  5745. }
  5746. } else {
  5747. gep_type_iterator temp = GEPIt;
  5748. temp++;
  5749. DXASSERT(temp == E, "scalar type must be the last");
  5750. }
  5751. }
  5752. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5753. Instruction *user = cast<Instruction>(*(U++));
  5754. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  5755. dxilTypeSys, DL, pObjHelper);
  5756. }
  5757. }
  5758. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  5759. DxilTypeSystem &dxilTypeSys,
  5760. const DataLayout &DL,
  5761. HLObjectOperationLowerHelper *pObjHelper) {
  5762. auto User = ptr->user_begin();
  5763. auto UserE = ptr->user_end();
  5764. Value *zeroIdx = hlslOP->GetU32Const(0);
  5765. for (; User != UserE;) {
  5766. // Must be Instruction.
  5767. Instruction *I = cast<Instruction>(*(User++));
  5768. TranslateCBAddressUserLegacy(
  5769. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  5770. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  5771. }
  5772. }
  5773. }
  5774. // Structured buffer.
  5775. namespace {
  5776. // Calculate offset.
  5777. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  5778. hlsl::OP *OP, const DataLayout &DL) {
  5779. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5780. Value *addr = nullptr;
  5781. // update offset
  5782. if (GEP->hasAllConstantIndices()) {
  5783. unsigned gepOffset =
  5784. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  5785. addr = OP->GetU32Const(gepOffset);
  5786. } else {
  5787. Value *offset = OP->GetU32Const(0);
  5788. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5789. for (; GEPIt != E; GEPIt++) {
  5790. Value *idx = GEPIt.getOperand();
  5791. unsigned immIdx = 0;
  5792. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  5793. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5794. if (immIdx == 0) {
  5795. continue;
  5796. }
  5797. }
  5798. if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) {
  5799. unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType());
  5800. if (immIdx) {
  5801. unsigned tempOffset = size * immIdx;
  5802. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5803. } else {
  5804. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5805. offset = Builder.CreateAdd(offset, tempOffset);
  5806. }
  5807. } else if (GEPIt->isStructTy()) {
  5808. const StructLayout *Layout = DL.getStructLayout(cast<StructType>(*GEPIt));
  5809. unsigned structOffset = Layout->getElementOffset(immIdx);
  5810. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  5811. } else {
  5812. gep_type_iterator temp = GEPIt;
  5813. temp++;
  5814. DXASSERT(temp == E, "scalar type must be the last");
  5815. }
  5816. };
  5817. addr = offset;
  5818. }
  5819. // TODO: x4 for byte address
  5820. return addr;
  5821. }
  5822. // Load a value from a typedef buffer with an offset.
  5823. // Typed buffer do not directly support reading at offsets
  5824. // because the whole value (e.g. float4) must be read at once.
  5825. // If we are provided a non-zero offset, we need to simulate it
  5826. // by returning the correct elements.
  5827. using ResRetValueArray = std::array<Value*, 4>;
  5828. static ResRetValueArray GenerateTypedBufferLoad(
  5829. Value *Handle, Type *BufferElemTy, Value *ElemIdx, Value *StatusPtr,
  5830. OP* HlslOP, IRBuilder<> &Builder) {
  5831. OP::OpCode OpCode = OP::OpCode::BufferLoad;
  5832. Value* LoadArgs[] = { HlslOP->GetU32Const((unsigned)OpCode), Handle, ElemIdx, UndefValue::get(Builder.getInt32Ty()) };
  5833. Function* LoadFunc = HlslOP->GetOpFunc(OpCode, BufferElemTy);
  5834. Value* Load = Builder.CreateCall(LoadFunc, LoadArgs, OP::GetOpCodeName(OpCode));
  5835. ResRetValueArray ResultValues;
  5836. for (unsigned i = 0; i < ResultValues.size(); ++i) {
  5837. ResultValues[i] = cast<ExtractValueInst>(Builder.CreateExtractValue(Load, { i }));
  5838. }
  5839. UpdateStatus(Load, StatusPtr, Builder, HlslOP);
  5840. return ResultValues;
  5841. }
  5842. static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
  5843. DXASSERT_NOMSG(!Values.empty());
  5844. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  5845. AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
  5846. for (unsigned i = 0; i < Values.size(); ++i) {
  5847. Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
  5848. Builder.CreateStore(Values[i], ArrayElemPtr);
  5849. }
  5850. return ArrayAlloca;
  5851. }
  5852. static Value* ExtractFromTypedBufferLoad(const ResRetValueArray& ResRet,
  5853. Type* ResultTy, Value* Offset, IRBuilder<>& Builder) {
  5854. unsigned ElemCount = ResultTy->isVectorTy() ? ResultTy->getVectorNumElements() : 1;
  5855. DXASSERT_NOMSG(ElemCount < ResRet.size());
  5856. unsigned ElemSizeInBytes = ResRet[0]->getType()->getScalarSizeInBits() / 8;
  5857. SmallVector<Value*, 4> Elems;
  5858. if (ConstantInt *OffsetAsConstantInt = dyn_cast<ConstantInt>(Offset)) {
  5859. // Get all elements to be returned
  5860. uint64_t FirstElemOffset = OffsetAsConstantInt->getLimitedValue();
  5861. DXASSERT_NOMSG(FirstElemOffset % ElemSizeInBytes == 0);
  5862. uint64_t FirstElemIdx = FirstElemOffset / ElemSizeInBytes;
  5863. DXASSERT_NOMSG(FirstElemIdx <= ResRet.size() - ElemCount);
  5864. for (unsigned ElemIdx = 0; ElemIdx < ElemCount; ++ElemIdx) {
  5865. Elems.emplace_back(ResRet[std::min<size_t>(FirstElemIdx + ElemIdx, ResRet.size() - 1)]);
  5866. }
  5867. }
  5868. else {
  5869. Value* ArrayAlloca = SpillValuesToArrayAlloca(
  5870. ArrayRef<Value*>(ResRet.data(), ResRet.size()), Builder);
  5871. // Get all elements to be returned through dynamic indices
  5872. Value *FirstElemIdx = Builder.CreateUDiv(Offset, Builder.getInt32(ElemSizeInBytes));
  5873. for (unsigned i = 0; i < ElemCount; ++i) {
  5874. Value *ElemIdx = Builder.CreateAdd(FirstElemIdx, Builder.getInt32(i));
  5875. Value* ElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), ElemIdx });
  5876. Elems.emplace_back(Builder.CreateLoad(ElemPtr));
  5877. }
  5878. }
  5879. return ScalarizeElements(ResultTy, Elems, Builder);
  5880. }
  5881. Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  5882. Value *status, Type *EltTy,
  5883. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  5884. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
  5885. OP::OpCode opcode = OP::OpCode::RawBufferLoad;
  5886. DXASSERT(resultElts.size() <= 4,
  5887. "buffer load cannot load more than 4 values");
  5888. if (bufIdx == nullptr) {
  5889. // This is actually a byte address buffer load with a struct template type.
  5890. // The call takes only one coordinates for the offset.
  5891. bufIdx = offset;
  5892. offset = UndefValue::get(offset->getType());
  5893. }
  5894. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5895. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
  5896. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5897. handle,
  5898. bufIdx,
  5899. offset,
  5900. mask,
  5901. alignment};
  5902. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5903. for (unsigned i = 0; i < resultElts.size(); i++) {
  5904. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  5905. }
  5906. // status
  5907. UpdateStatus(Ld, status, Builder, OP);
  5908. return Ld;
  5909. }
  5910. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  5911. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  5912. ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
  5913. OP::OpCode opcode = OP::OpCode::RawBufferStore;
  5914. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  5915. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5916. handle,
  5917. bufIdx,
  5918. offset,
  5919. vals[0],
  5920. vals[1],
  5921. vals[2],
  5922. vals[3],
  5923. OP->GetU8Const(mask),
  5924. alignment};
  5925. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5926. Builder.CreateCall(dxilF, Args);
  5927. }
  5928. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  5929. Value *handle, hlsl::OP *OP, Value *status,
  5930. Value *bufIdx, Value *baseOffset,
  5931. const DataLayout &DL) {
  5932. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5933. Type *EltTy = MatTy.getElementTypeForMem();
  5934. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5935. Constant* alignment = OP->GetI32Const(EltSize);
  5936. Value *offset = baseOffset;
  5937. if (baseOffset == nullptr)
  5938. offset = OP->GetU32Const(0);
  5939. unsigned matSize = MatTy.getNumElements();
  5940. std::vector<Value *> elts(matSize);
  5941. unsigned rest = (matSize % 4);
  5942. if (rest) {
  5943. Value *ResultElts[4];
  5944. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
  5945. for (unsigned i = 0; i < rest; i++)
  5946. elts[i] = ResultElts[i];
  5947. offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
  5948. }
  5949. for (unsigned i = rest; i < matSize; i += 4) {
  5950. Value *ResultElts[4];
  5951. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
  5952. elts[i] = ResultElts[0];
  5953. elts[i + 1] = ResultElts[1];
  5954. elts[i + 2] = ResultElts[2];
  5955. elts[i + 3] = ResultElts[3];
  5956. // Update offset by 4*4bytes.
  5957. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  5958. }
  5959. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5960. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5961. return Vec;
  5962. }
  5963. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  5964. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  5965. Value *val, const DataLayout &DL) {
  5966. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5967. Type *EltTy = MatTy.getElementTypeForMem();
  5968. val = MatTy.emitLoweredRegToMem(val, Builder);
  5969. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5970. Constant *Alignment = OP->GetI32Const(EltSize);
  5971. Value *offset = baseOffset;
  5972. if (baseOffset == nullptr)
  5973. offset = OP->GetU32Const(0);
  5974. unsigned matSize = MatTy.getNumElements();
  5975. Value *undefElt = UndefValue::get(EltTy);
  5976. unsigned storeSize = matSize;
  5977. if (matSize % 4) {
  5978. storeSize = matSize + 4 - (matSize & 3);
  5979. }
  5980. std::vector<Value *> elts(storeSize, undefElt);
  5981. for (unsigned i = 0; i < matSize; i++)
  5982. elts[i] = Builder.CreateExtractElement(val, i);
  5983. for (unsigned i = 0; i < matSize; i += 4) {
  5984. uint8_t mask = 0;
  5985. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  5986. if (elts[i+j] != undefElt)
  5987. mask |= (1<<j);
  5988. }
  5989. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  5990. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
  5991. Alignment);
  5992. // Update offset by 4*4bytes.
  5993. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  5994. }
  5995. }
  5996. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  5997. Value *status, Value *bufIdx,
  5998. Value *baseOffset, const DataLayout &DL) {
  5999. IRBuilder<> Builder(CI);
  6000. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  6001. unsigned opcode = GetHLOpcode(CI);
  6002. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  6003. "only translate matrix loadStore here.");
  6004. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  6005. // Due to the current way the initial codegen generates matrix
  6006. // orientation casts, the in-register vector matrix has already been
  6007. // reordered based on the destination's row or column-major packing orientation.
  6008. switch (matOp) {
  6009. case HLMatLoadStoreOpcode::RowMatLoad:
  6010. case HLMatLoadStoreOpcode::ColMatLoad: {
  6011. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6012. Value *NewLd = TranslateStructBufMatLd(
  6013. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  6014. bufIdx, baseOffset, DL);
  6015. CI->replaceAllUsesWith(NewLd);
  6016. } break;
  6017. case HLMatLoadStoreOpcode::RowMatStore:
  6018. case HLMatLoadStoreOpcode::ColMatStore: {
  6019. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6020. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6021. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  6022. handle, OP, bufIdx, baseOffset, val,
  6023. DL);
  6024. } break;
  6025. }
  6026. CI->eraseFromParent();
  6027. }
  6028. void TranslateStructBufSubscriptUser(Instruction *user,
  6029. Value *handle, HLResource::Kind ResKind,
  6030. Value *bufIdx, Value *baseOffset, Value *status,
  6031. hlsl::OP *OP, const DataLayout &DL);
  6032. // For case like mat[i][j].
  6033. // IdxList is [i][0], [i][1], [i][2],[i][3].
  6034. // Idx is j.
  6035. // return [i][j] not mat[i][j] because resource ptr and temp ptr need different
  6036. // code gen.
  6037. static Value *LowerGEPOnMatIndexListToIndex(
  6038. llvm::GetElementPtrInst *GEP, ArrayRef<Value *> IdxList) {
  6039. IRBuilder<> Builder(GEP);
  6040. Value *zero = Builder.getInt32(0);
  6041. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  6042. Value *baseIdx = (GEP->idx_begin())->get();
  6043. DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
  6044. Value *Idx = (GEP->idx_begin() + 1)->get();
  6045. if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
  6046. return IdxList[immIdx->getSExtValue()];
  6047. }
  6048. else {
  6049. IRBuilder<> AllocaBuilder(
  6050. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  6051. unsigned size = IdxList.size();
  6052. // Store idxList to temp array.
  6053. ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
  6054. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  6055. for (unsigned i = 0; i < size; i++) {
  6056. Value *EltPtr = Builder.CreateGEP(tempArray, { zero, Builder.getInt32(i) });
  6057. Builder.CreateStore(IdxList[i], EltPtr);
  6058. }
  6059. // Load the idx.
  6060. Value *GEPOffset = Builder.CreateGEP(tempArray, { zero, Idx });
  6061. return Builder.CreateLoad(GEPOffset);
  6062. }
  6063. }
  6064. // subscript operator for matrix of struct element.
  6065. void TranslateStructBufMatSubscript(CallInst *CI,
  6066. Value *handle, HLResource::Kind ResKind,
  6067. Value *bufIdx, Value *baseOffset, Value *status,
  6068. hlsl::OP* hlslOP, const DataLayout &DL) {
  6069. unsigned opcode = GetHLOpcode(CI);
  6070. IRBuilder<> subBuilder(CI);
  6071. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  6072. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6073. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  6074. Type *EltTy = MatTy.getElementTypeForReg();
  6075. Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
  6076. Value *EltByteSize = ConstantInt::get(
  6077. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  6078. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  6079. Type *resultType = CI->getType()->getPointerElementType();
  6080. unsigned resultSize = 1;
  6081. if (resultType->isVectorTy())
  6082. resultSize = resultType->getVectorNumElements();
  6083. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  6084. _Analysis_assume_(resultSize <= 16);
  6085. std::vector<Value *> idxList(resultSize);
  6086. switch (subOp) {
  6087. case HLSubscriptOpcode::ColMatSubscript:
  6088. case HLSubscriptOpcode::RowMatSubscript: {
  6089. for (unsigned i = 0; i < resultSize; i++) {
  6090. Value *offset =
  6091. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  6092. offset = subBuilder.CreateMul(offset, EltByteSize);
  6093. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6094. }
  6095. } break;
  6096. case HLSubscriptOpcode::RowMatElement:
  6097. case HLSubscriptOpcode::ColMatElement: {
  6098. Constant *EltIdxs = cast<Constant>(idx);
  6099. for (unsigned i = 0; i < resultSize; i++) {
  6100. Value *offset =
  6101. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  6102. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6103. }
  6104. } break;
  6105. default:
  6106. DXASSERT(0, "invalid operation on const buffer");
  6107. break;
  6108. }
  6109. Value *undefElt = UndefValue::get(EltTy);
  6110. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6111. Value *subsUser = *(U++);
  6112. if (resultSize == 1) {
  6113. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser),
  6114. handle, ResKind, bufIdx, idxList[0], status, hlslOP, DL);
  6115. continue;
  6116. }
  6117. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  6118. Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
  6119. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  6120. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  6121. TranslateStructBufSubscriptUser(gepUserInst,
  6122. handle, ResKind, bufIdx, GEPOffset, status, hlslOP, DL);
  6123. }
  6124. GEP->eraseFromParent();
  6125. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  6126. IRBuilder<> stBuilder(stUser);
  6127. Value *Val = stUser->getValueOperand();
  6128. if (Val->getType()->isVectorTy()) {
  6129. for (unsigned i = 0; i < resultSize; i++) {
  6130. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  6131. uint8_t mask = DXIL::kCompMask_X;
  6132. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  6133. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  6134. mask, alignment);
  6135. }
  6136. } else {
  6137. uint8_t mask = DXIL::kCompMask_X;
  6138. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  6139. stBuilder, {Val, undefElt, undefElt, undefElt},
  6140. mask, alignment);
  6141. }
  6142. stUser->eraseFromParent();
  6143. } else {
  6144. // Must be load here.
  6145. LoadInst *ldUser = cast<LoadInst>(subsUser);
  6146. IRBuilder<> ldBuilder(ldUser);
  6147. Value *ldData = UndefValue::get(resultType);
  6148. if (resultType->isVectorTy()) {
  6149. for (unsigned i = 0; i < resultSize; i++) {
  6150. Value *ResultElt;
  6151. // TODO: This can be inefficient for row major matrix load
  6152. GenerateStructBufLd(handle, bufIdx, idxList[i],
  6153. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  6154. ldBuilder, 1, alignment);
  6155. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  6156. }
  6157. } else {
  6158. GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  6159. EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
  6160. }
  6161. ldUser->replaceAllUsesWith(ldData);
  6162. ldUser->eraseFromParent();
  6163. }
  6164. }
  6165. CI->eraseFromParent();
  6166. }
  6167. void TranslateStructBufSubscriptUser(
  6168. Instruction *user, Value *handle, HLResource::Kind ResKind,
  6169. Value *bufIdx, Value *baseOffset, Value *status,
  6170. hlsl::OP *OP, const DataLayout &DL) {
  6171. IRBuilder<> Builder(user);
  6172. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  6173. HLOpcodeGroup group = // user call?
  6174. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6175. unsigned opcode = GetHLOpcode(userCall);
  6176. // For case element type of structure buffer is not structure type.
  6177. if (baseOffset == nullptr)
  6178. baseOffset = OP->GetU32Const(0);
  6179. if (group == HLOpcodeGroup::HLIntrinsic) {
  6180. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6181. switch (IOP) {
  6182. case IntrinsicOp::MOP_Load: {
  6183. if (userCall->getType()->isPointerTy()) {
  6184. // Struct will return pointers which like []
  6185. } else {
  6186. // Use builtin types on structuredBuffer.
  6187. }
  6188. DXASSERT(0, "not implement yet");
  6189. } break;
  6190. case IntrinsicOp::IOP_InterlockedAdd: {
  6191. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6192. baseOffset);
  6193. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  6194. Builder, OP);
  6195. } break;
  6196. case IntrinsicOp::IOP_InterlockedAnd: {
  6197. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6198. baseOffset);
  6199. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  6200. Builder, OP);
  6201. } break;
  6202. case IntrinsicOp::IOP_InterlockedExchange: {
  6203. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6204. baseOffset);
  6205. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  6206. Builder, OP);
  6207. } break;
  6208. case IntrinsicOp::IOP_InterlockedMax: {
  6209. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6210. baseOffset);
  6211. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  6212. Builder, OP);
  6213. } break;
  6214. case IntrinsicOp::IOP_InterlockedMin: {
  6215. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6216. baseOffset);
  6217. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  6218. Builder, OP);
  6219. } break;
  6220. case IntrinsicOp::IOP_InterlockedUMax: {
  6221. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6222. baseOffset);
  6223. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  6224. Builder, OP);
  6225. } break;
  6226. case IntrinsicOp::IOP_InterlockedUMin: {
  6227. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6228. baseOffset);
  6229. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  6230. Builder, OP);
  6231. } break;
  6232. case IntrinsicOp::IOP_InterlockedOr: {
  6233. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6234. baseOffset);
  6235. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  6236. Builder, OP);
  6237. } break;
  6238. case IntrinsicOp::IOP_InterlockedXor: {
  6239. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6240. baseOffset);
  6241. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  6242. Builder, OP);
  6243. } break;
  6244. case IntrinsicOp::IOP_InterlockedCompareStore:
  6245. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6246. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6247. handle, bufIdx, baseOffset);
  6248. TranslateAtomicCmpXChg(helper, Builder, OP);
  6249. } break;
  6250. default:
  6251. DXASSERT(0, "invalid opcode");
  6252. break;
  6253. }
  6254. userCall->eraseFromParent();
  6255. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  6256. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  6257. baseOffset, DL);
  6258. else if (group == HLOpcodeGroup::HLSubscript) {
  6259. TranslateStructBufMatSubscript(userCall,
  6260. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6261. }
  6262. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  6263. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  6264. StoreInst *stInst = dyn_cast<StoreInst>(user);
  6265. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  6266. : stInst->getValueOperand()->getType();
  6267. Type *pOverloadTy = Ty->getScalarType();
  6268. Value *offset = baseOffset;
  6269. unsigned arraySize = 1;
  6270. Value *eltSize = nullptr;
  6271. if (pOverloadTy->isArrayTy()) {
  6272. arraySize = pOverloadTy->getArrayNumElements();
  6273. eltSize = OP->GetU32Const(
  6274. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  6275. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  6276. }
  6277. if (ldInst) {
  6278. auto LdElement = [=](Value *offset, IRBuilder<> &Builder) -> Value * {
  6279. unsigned numComponents = 0;
  6280. if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
  6281. numComponents = VTy->getNumElements();
  6282. }
  6283. else {
  6284. numComponents = 1;
  6285. }
  6286. Constant *alignment =
  6287. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6288. if (ResKind == HLResource::Kind::TypedBuffer) {
  6289. // Typed buffer cannot have offsets, they must be loaded all at once
  6290. ResRetValueArray ResRet = GenerateTypedBufferLoad(
  6291. handle, pOverloadTy, bufIdx, status, OP, Builder);
  6292. return ExtractFromTypedBufferLoad(ResRet, Ty, offset, Builder);
  6293. }
  6294. else {
  6295. Value* ResultElts[4];
  6296. GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
  6297. ResultElts, OP, Builder, numComponents, alignment);
  6298. return ScalarizeElements(Ty, ResultElts, Builder);
  6299. }
  6300. };
  6301. Value *newLd = LdElement(offset, Builder);
  6302. if (arraySize > 1) {
  6303. newLd =
  6304. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  6305. for (unsigned i = 1; i < arraySize; i++) {
  6306. offset = Builder.CreateAdd(offset, eltSize);
  6307. Value *eltLd = LdElement(offset, Builder);
  6308. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  6309. }
  6310. }
  6311. ldInst->replaceAllUsesWith(newLd);
  6312. } else {
  6313. Value *val = stInst->getValueOperand();
  6314. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  6315. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  6316. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  6317. uint8_t mask = 0;
  6318. if (Ty->isVectorTy()) {
  6319. unsigned vectorNumElements = Ty->getVectorNumElements();
  6320. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  6321. _Analysis_assume_(vectorNumElements <= 4);
  6322. for (unsigned i = 0; i < vectorNumElements; i++) {
  6323. vals[i] = Builder.CreateExtractElement(val, i);
  6324. mask |= (1<<i);
  6325. }
  6326. } else {
  6327. vals[0] = val;
  6328. mask = DXIL::kCompMask_X;
  6329. }
  6330. Constant *alignment =
  6331. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6332. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  6333. vals, mask, alignment);
  6334. };
  6335. if (arraySize > 1)
  6336. val = Builder.CreateExtractValue(val, 0);
  6337. StElement(offset, val, Builder);
  6338. if (arraySize > 1) {
  6339. val = stInst->getValueOperand();
  6340. for (unsigned i = 1; i < arraySize; i++) {
  6341. offset = Builder.CreateAdd(offset, eltSize);
  6342. Value *eltVal = Builder.CreateExtractValue(val, i);
  6343. StElement(offset, eltVal, Builder);
  6344. }
  6345. }
  6346. }
  6347. user->eraseFromParent();
  6348. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  6349. // Recurse users
  6350. for (auto U = BCI->user_begin(); U != BCI->user_end();) {
  6351. Value *BCIUser = *(U++);
  6352. TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser),
  6353. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6354. }
  6355. BCI->eraseFromParent();
  6356. } else {
  6357. // should only used by GEP
  6358. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  6359. Type *Ty = GEP->getType()->getPointerElementType();
  6360. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  6361. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  6362. "else bitness is wrong");
  6363. offset = Builder.CreateAdd(offset, baseOffset);
  6364. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  6365. Value *GEPUser = *(U++);
  6366. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser),
  6367. handle, ResKind, bufIdx, offset, status, OP, DL);
  6368. }
  6369. // delete the inst
  6370. GEP->eraseFromParent();
  6371. }
  6372. }
  6373. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  6374. hlsl::OP *OP, HLResource::Kind ResKind, const DataLayout &DL) {
  6375. Value *subscriptIndex = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6376. Value* bufIdx = nullptr;
  6377. Value *offset = nullptr;
  6378. if (ResKind == HLResource::Kind::RawBuffer) {
  6379. offset = subscriptIndex;
  6380. }
  6381. else {
  6382. // StructuredBuffer, TypedBuffer, etc.
  6383. bufIdx = subscriptIndex;
  6384. offset = OP->GetU32Const(0);
  6385. }
  6386. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6387. Value *user = *(U++);
  6388. TranslateStructBufSubscriptUser(cast<Instruction>(user),
  6389. handle, ResKind, bufIdx, offset, status, OP, DL);
  6390. }
  6391. }
  6392. }
  6393. // HLSubscript.
  6394. namespace {
  6395. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  6396. DXIL::ResourceClass RC, Value *handle,
  6397. LoadInst *ldInst, IRBuilder<> &Builder,
  6398. hlsl::OP *hlslOP, const DataLayout &DL) {
  6399. ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
  6400. // Default sampleIdx for 2DMS textures.
  6401. if (RK == DxilResource::Kind::Texture2DMS ||
  6402. RK == DxilResource::Kind::Texture2DMSArray)
  6403. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  6404. // use ldInst as retVal
  6405. ldHelper.retVal = ldInst;
  6406. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  6407. // delete the ld
  6408. ldInst->eraseFromParent();
  6409. return ldHelper.retVal;
  6410. }
  6411. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  6412. unsigned vectorSize, Instruction *InsertPt) {
  6413. IRBuilder<> Builder(InsertPt);
  6414. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  6415. VecVal =
  6416. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  6417. } else {
  6418. BasicBlock *BB = InsertPt->getParent();
  6419. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  6420. TerminatorInst *TI = BB->getTerminator();
  6421. IRBuilder<> SwitchBuilder(TI);
  6422. LLVMContext &Ctx = InsertPt->getContext();
  6423. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  6424. TI->eraseFromParent();
  6425. Function *F = EndBB->getParent();
  6426. IRBuilder<> endSwitchBuilder(EndBB->begin());
  6427. Type *Ty = VecVal->getType();
  6428. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  6429. for (unsigned i = 0; i < vectorSize; i++) {
  6430. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  6431. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  6432. IRBuilder<> CaseBuilder(CaseBB);
  6433. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  6434. VecPhi->addIncoming(CaseVal, CaseBB);
  6435. CaseBuilder.CreateBr(EndBB);
  6436. }
  6437. VecPhi->addIncoming(VecVal, BB);
  6438. VecVal = VecPhi;
  6439. }
  6440. return VecVal;
  6441. }
  6442. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6443. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6444. hlsl::OP *hlslOP = &helper.hlslOP;
  6445. // Resource ptr.
  6446. Value *handle = ptr;
  6447. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  6448. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6449. Type *Ty = CI->getType()->getPointerElementType();
  6450. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  6451. User *user = *(It++);
  6452. Instruction *I = cast<Instruction>(user);
  6453. IRBuilder<> Builder(I);
  6454. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  6455. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
  6456. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  6457. Value *val = stInst->getValueOperand();
  6458. TranslateStore(RK, handle, val,
  6459. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6460. Builder, hlslOP);
  6461. // delete the st
  6462. stInst->eraseFromParent();
  6463. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  6464. // Must be vector type here.
  6465. unsigned vectorSize = Ty->getVectorNumElements();
  6466. DXASSERT_NOMSG(GEP->getNumIndices() == 2);
  6467. Use *GEPIdx = GEP->idx_begin();
  6468. GEPIdx++;
  6469. Value *EltIdx = *GEPIdx;
  6470. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  6471. User *GEPUser = *(GEPIt++);
  6472. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  6473. IRBuilder<> StBuilder(SI);
  6474. // Generate Ld.
  6475. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  6476. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  6477. hlslOP, helper.dataLayout);
  6478. // Update vector.
  6479. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  6480. vectorSize, SI);
  6481. // Generate St.
  6482. // Reset insert point, UpdateVectorElt may move SI to different block.
  6483. StBuilder.SetInsertPoint(SI);
  6484. TranslateStore(RK, handle, ldVal,
  6485. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6486. StBuilder, hlslOP);
  6487. SI->eraseFromParent();
  6488. continue;
  6489. }
  6490. if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
  6491. IRBuilder<> LdBuilder(LI);
  6492. // Generate tmp vector load with vector type & translate it
  6493. LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
  6494. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, LdBuilder,
  6495. hlslOP, helper.dataLayout);
  6496. // get the single element
  6497. ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder,
  6498. /*bInsertLdNextToGEP*/ false);
  6499. LI->replaceAllUsesWith(ldVal);
  6500. LI->eraseFromParent();
  6501. continue;
  6502. }
  6503. if (!isa<CallInst>(GEPUser)) {
  6504. // Invalid operations.
  6505. Translated = false;
  6506. dxilutil::EmitErrorOnInstruction(GEP, "Invalid operation on typed buffer.");
  6507. return;
  6508. }
  6509. CallInst *userCall = cast<CallInst>(GEPUser);
  6510. HLOpcodeGroup group =
  6511. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6512. if (group != HLOpcodeGroup::HLIntrinsic) {
  6513. // Invalid operations.
  6514. Translated = false;
  6515. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6516. return;
  6517. }
  6518. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6519. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6520. switch (IOP) {
  6521. case IntrinsicOp::IOP_InterlockedAdd:
  6522. case IntrinsicOp::IOP_InterlockedAnd:
  6523. case IntrinsicOp::IOP_InterlockedExchange:
  6524. case IntrinsicOp::IOP_InterlockedMax:
  6525. case IntrinsicOp::IOP_InterlockedMin:
  6526. case IntrinsicOp::IOP_InterlockedUMax:
  6527. case IntrinsicOp::IOP_InterlockedUMin:
  6528. case IntrinsicOp::IOP_InterlockedOr:
  6529. case IntrinsicOp::IOP_InterlockedXor:
  6530. case IntrinsicOp::IOP_InterlockedCompareStore:
  6531. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6532. // Invalid operations.
  6533. Translated = false;
  6534. dxilutil::EmitErrorOnInstruction(
  6535. userCall, "Atomic operation on typed buffer is not supported.");
  6536. return;
  6537. } break;
  6538. default:
  6539. // Invalid operations.
  6540. Translated = false;
  6541. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6542. return;
  6543. break;
  6544. }
  6545. }
  6546. GEP->eraseFromParent();
  6547. } else {
  6548. CallInst *userCall = cast<CallInst>(user);
  6549. HLOpcodeGroup group =
  6550. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6551. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6552. if (group == HLOpcodeGroup::HLIntrinsic) {
  6553. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6554. if (RC == DXIL::ResourceClass::SRV) {
  6555. // Invalid operations.
  6556. Translated = false;
  6557. switch (IOP) {
  6558. case IntrinsicOp::IOP_InterlockedAdd:
  6559. case IntrinsicOp::IOP_InterlockedAnd:
  6560. case IntrinsicOp::IOP_InterlockedExchange:
  6561. case IntrinsicOp::IOP_InterlockedMax:
  6562. case IntrinsicOp::IOP_InterlockedMin:
  6563. case IntrinsicOp::IOP_InterlockedUMax:
  6564. case IntrinsicOp::IOP_InterlockedUMin:
  6565. case IntrinsicOp::IOP_InterlockedOr:
  6566. case IntrinsicOp::IOP_InterlockedXor:
  6567. case IntrinsicOp::IOP_InterlockedCompareStore:
  6568. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6569. dxilutil::EmitErrorOnInstruction(
  6570. userCall, "Atomic operation targets must be groupshared on UAV.");
  6571. return;
  6572. } break;
  6573. default:
  6574. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6575. return;
  6576. break;
  6577. }
  6578. }
  6579. switch (IOP) {
  6580. case IntrinsicOp::IOP_InterlockedAdd: {
  6581. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
  6582. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6583. helper.addr, /*offset*/ nullptr);
  6584. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  6585. Builder, hlslOP);
  6586. } break;
  6587. case IntrinsicOp::IOP_InterlockedAnd: {
  6588. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
  6589. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6590. helper.addr, /*offset*/ nullptr);
  6591. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  6592. Builder, hlslOP);
  6593. } break;
  6594. case IntrinsicOp::IOP_InterlockedExchange: {
  6595. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
  6596. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6597. helper.addr, /*offset*/ nullptr);
  6598. TranslateAtomicBinaryOperation(
  6599. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  6600. } break;
  6601. case IntrinsicOp::IOP_InterlockedMax: {
  6602. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
  6603. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6604. helper.addr, /*offset*/ nullptr);
  6605. TranslateAtomicBinaryOperation(
  6606. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  6607. } break;
  6608. case IntrinsicOp::IOP_InterlockedMin: {
  6609. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
  6610. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6611. helper.addr, /*offset*/ nullptr);
  6612. TranslateAtomicBinaryOperation(
  6613. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  6614. } break;
  6615. case IntrinsicOp::IOP_InterlockedUMax: {
  6616. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
  6617. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6618. helper.addr, /*offset*/ nullptr);
  6619. TranslateAtomicBinaryOperation(
  6620. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  6621. } break;
  6622. case IntrinsicOp::IOP_InterlockedUMin: {
  6623. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
  6624. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6625. helper.addr, /*offset*/ nullptr);
  6626. TranslateAtomicBinaryOperation(
  6627. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  6628. } break;
  6629. case IntrinsicOp::IOP_InterlockedOr: {
  6630. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
  6631. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6632. helper.addr, /*offset*/ nullptr);
  6633. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  6634. Builder, hlslOP);
  6635. } break;
  6636. case IntrinsicOp::IOP_InterlockedXor: {
  6637. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
  6638. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6639. helper.addr, /*offset*/ nullptr);
  6640. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  6641. Builder, hlslOP);
  6642. } break;
  6643. case IntrinsicOp::IOP_InterlockedCompareStore:
  6644. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6645. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  6646. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6647. handle, helper.addr, /*offset*/ nullptr);
  6648. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  6649. } break;
  6650. default:
  6651. DXASSERT(0, "invalid opcode");
  6652. break;
  6653. }
  6654. } else {
  6655. DXASSERT(0, "invalid group");
  6656. }
  6657. userCall->eraseFromParent();
  6658. }
  6659. }
  6660. }
  6661. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  6662. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6663. if (CI->user_empty()) {
  6664. Translated = true;
  6665. return;
  6666. }
  6667. hlsl::OP *hlslOP = &helper.hlslOP;
  6668. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6669. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  6670. HLModule::MergeGepUse(CI);
  6671. // Resource ptr.
  6672. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6673. if (helper.bLegacyCBufferLoad)
  6674. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  6675. helper.dataLayout, pObjHelper);
  6676. else {
  6677. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  6678. hlslOP, helper.dxilTypeSys,
  6679. CI->getModule()->getDataLayout(), pObjHelper);
  6680. }
  6681. Translated = true;
  6682. return;
  6683. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  6684. // Resource ptr.
  6685. Value *handle = ptr;
  6686. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6687. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6688. Value *mipLevel =
  6689. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  6690. auto U = CI->user_begin();
  6691. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  6692. // TODO: support store.
  6693. Instruction *ldInst = cast<Instruction>(*U);
  6694. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  6695. IRBuilder<> Builder(CI);
  6696. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
  6697. ldInst->eraseFromParent();
  6698. Translated = true;
  6699. return;
  6700. } else {
  6701. Type *HandleTy = hlslOP->GetHandleType();
  6702. if (ptr->getType() == HandleTy) {
  6703. // Resource ptr.
  6704. Value *handle = ptr;
  6705. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6706. if (RK == DxilResource::Kind::Invalid) {
  6707. Translated = false;
  6708. return;
  6709. }
  6710. Translated = true;
  6711. Type *ObjTy = pObjHelper->GetResourceType(handle);
  6712. Type *RetTy = ObjTy->getStructElementType(0);
  6713. if (DXIL::IsStructuredBuffer(RK)) {
  6714. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  6715. helper.dataLayout);
  6716. } else if (RetTy->isAggregateType() &&
  6717. RK == DxilResource::Kind::TypedBuffer) {
  6718. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  6719. helper.dataLayout);
  6720. // Clear offset for typed buf.
  6721. for (auto User = handle->user_begin(); User != handle->user_end(); ) {
  6722. CallInst *CI = cast<CallInst>(*(User++));
  6723. // Skip not lowered HL functions.
  6724. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  6725. continue;
  6726. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  6727. case DXIL::OpCode::BufferLoad: {
  6728. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  6729. UndefValue::get(helper.i32Ty));
  6730. } break;
  6731. case DXIL::OpCode::BufferStore: {
  6732. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  6733. UndefValue::get(helper.i32Ty));
  6734. } break;
  6735. case DXIL::OpCode::AtomicBinOp: {
  6736. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  6737. UndefValue::get(helper.i32Ty));
  6738. } break;
  6739. case DXIL::OpCode::AtomicCompareExchange: {
  6740. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  6741. UndefValue::get(helper.i32Ty));
  6742. } break;
  6743. case DXIL::OpCode::RawBufferLoad: {
  6744. // Structured buffer inside a typed buffer must be converted to typed buffer load.
  6745. // Typed buffer load is equivalent to raw buffer load, except there is no mask.
  6746. StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
  6747. Type *ETy = STy->getElementType(0);
  6748. SmallVector<Value *, 4> Args;
  6749. Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  6750. Args.emplace_back(CI->getArgOperand(1)); // handle
  6751. Args.emplace_back(CI->getArgOperand(2)); // index
  6752. Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
  6753. IRBuilder<> builder(CI);
  6754. Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
  6755. CallInst *newCall = builder.CreateCall(newFunction, Args);
  6756. CI->replaceAllUsesWith(newCall);
  6757. CI->eraseFromParent();
  6758. } break;
  6759. default:
  6760. DXASSERT(0, "Invalid operation on resource handle");
  6761. break;
  6762. }
  6763. }
  6764. } else {
  6765. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  6766. }
  6767. return;
  6768. }
  6769. }
  6770. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6771. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  6772. // Translate matrix into vector of array for share memory or local
  6773. // variable should be done in HLMatrixLowerPass
  6774. DXASSERT_NOMSG(0);
  6775. Translated = true;
  6776. return;
  6777. }
  6778. // Other case should be take care in TranslateStructBufSubscript or
  6779. // TranslateCBOperations.
  6780. Translated = false;
  6781. return;
  6782. }
  6783. }
  6784. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  6785. for (auto U = F->user_begin(); U != F->user_end();) {
  6786. Value *user = *(U++);
  6787. if (!isa<Instruction>(user))
  6788. continue;
  6789. // must be call inst
  6790. CallInst *CI = cast<CallInst>(user);
  6791. unsigned opcode = GetHLOpcode(CI);
  6792. bool Translated = true;
  6793. TranslateHLSubscript(
  6794. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  6795. if (Translated) {
  6796. // delete the call
  6797. DXASSERT(CI->use_empty(),
  6798. "else TranslateHLSubscript didn't replace/erase uses");
  6799. CI->eraseFromParent();
  6800. }
  6801. }
  6802. }
  6803. // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
  6804. // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
  6805. static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
  6806. IRBuilder<> Builder(Insert);
  6807. if (Ty->isPointerTy()) {
  6808. // If pointer, we can bitcast directly
  6809. return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
  6810. } else {
  6811. // If value, we have to alloca, store to bitcast ptr, and load
  6812. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
  6813. Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
  6814. Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
  6815. Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
  6816. Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
  6817. Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
  6818. return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
  6819. }
  6820. }
  6821. static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
  6822. SmallVector<int, 16> castMask(toCols * toRows);
  6823. unsigned idx = 0;
  6824. for (unsigned r = 0; r < toRows; r++)
  6825. for (unsigned c = 0; c < toCols; c++)
  6826. castMask[idx++] = c * toRows + r;
  6827. return cast<Instruction>(
  6828. Builder.CreateShuffleVector(vecVal, vecVal, castMask));
  6829. }
  6830. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  6831. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  6832. if (group == HLOpcodeGroup::HLIntrinsic) {
  6833. // map to dxil operations
  6834. for (auto U = F->user_begin(); U != F->user_end();) {
  6835. Value *User = *(U++);
  6836. if (!isa<Instruction>(User))
  6837. continue;
  6838. // must be call inst
  6839. CallInst *CI = cast<CallInst>(User);
  6840. // Keep the instruction to lower by other function.
  6841. bool Translated = true;
  6842. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  6843. if (Translated) {
  6844. // delete the call
  6845. DXASSERT(CI->use_empty(),
  6846. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  6847. CI->eraseFromParent();
  6848. }
  6849. }
  6850. } else {
  6851. if (group == HLOpcodeGroup::HLMatLoadStore) {
  6852. // Both ld/st use arg1 for the pointer.
  6853. Type *PtrTy =
  6854. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  6855. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
  6856. // Translate matrix into vector of array for shared memory
  6857. // variable should be done in HLMatrixLowerPass.
  6858. if (!F->user_empty())
  6859. F->getContext().emitError("Fail to lower matrix load/store.");
  6860. } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  6861. // Default address space may be function argument in lib target
  6862. if (!F->user_empty()) {
  6863. for (auto U = F->user_begin(); U != F->user_end();) {
  6864. Value *User = *(U++);
  6865. if (!isa<Instruction>(User))
  6866. continue;
  6867. // must be call inst
  6868. CallInst *CI = cast<CallInst>(User);
  6869. IRBuilder<> Builder(CI);
  6870. HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  6871. switch (opcode) {
  6872. case HLMatLoadStoreOpcode::ColMatStore:
  6873. case HLMatLoadStoreOpcode::RowMatStore: {
  6874. Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6875. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6876. Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
  6877. Builder.CreateStore(vecVal, castPtr);
  6878. CI->eraseFromParent();
  6879. } break;
  6880. case HLMatLoadStoreOpcode::ColMatLoad:
  6881. case HLMatLoadStoreOpcode::RowMatLoad: {
  6882. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6883. Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
  6884. Value *vecVal = Builder.CreateLoad(castPtr);
  6885. CI->replaceAllUsesWith(vecVal);
  6886. CI->eraseFromParent();
  6887. } break;
  6888. }
  6889. }
  6890. }
  6891. }
  6892. } else if (group == HLOpcodeGroup::HLCast) {
  6893. // HLCast may be used on matrix value function argument in lib target
  6894. if (!F->user_empty()) {
  6895. for (auto U = F->user_begin(); U != F->user_end();) {
  6896. Value *User = *(U++);
  6897. if (!isa<Instruction>(User))
  6898. continue;
  6899. // must be call inst
  6900. CallInst *CI = cast<CallInst>(User);
  6901. IRBuilder<> Builder(CI);
  6902. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  6903. bool bTranspose = false;
  6904. bool bColDest = false;
  6905. switch (opcode) {
  6906. case HLCastOpcode::RowMatrixToColMatrix:
  6907. bColDest = true;
  6908. case HLCastOpcode::ColMatrixToRowMatrix:
  6909. bTranspose = true;
  6910. case HLCastOpcode::ColMatrixToVecCast:
  6911. case HLCastOpcode::RowMatrixToVecCast: {
  6912. Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
  6913. Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
  6914. /*bOrigAllocaTy*/false,
  6915. matVal->getName());
  6916. if (bTranspose) {
  6917. HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
  6918. unsigned row = MatTy.getNumRows();
  6919. unsigned col = MatTy.getNumColumns();
  6920. if (bColDest) std::swap(row, col);
  6921. vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
  6922. }
  6923. CI->replaceAllUsesWith(vecVal);
  6924. CI->eraseFromParent();
  6925. } break;
  6926. }
  6927. }
  6928. }
  6929. } else if (group == HLOpcodeGroup::HLSubscript) {
  6930. TranslateSubscriptOperation(F, helper, pObjHelper);
  6931. }
  6932. // map to math function or llvm ir
  6933. }
  6934. }
  6935. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  6936. static void TranslateHLExtension(Function *F,
  6937. HLSLExtensionsCodegenHelper *helper,
  6938. OP& hlslOp) {
  6939. // Find all calls to the function F.
  6940. // Store the calls in a vector for now to be replaced the loop below.
  6941. // We use a two step "find then replace" to avoid removing uses while
  6942. // iterating.
  6943. SmallVector<CallInst *, 8> CallsToReplace;
  6944. for (User *U : F->users()) {
  6945. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  6946. CallsToReplace.push_back(CI);
  6947. }
  6948. }
  6949. // Get the lowering strategy to use for this intrinsic.
  6950. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  6951. ExtensionLowering lower(LowerStrategy, helper, hlslOp);
  6952. // Replace all calls that were successfully translated.
  6953. for (CallInst *CI : CallsToReplace) {
  6954. Value *Result = lower.Translate(CI);
  6955. if (Result && Result != CI) {
  6956. CI->replaceAllUsesWith(Result);
  6957. CI->eraseFromParent();
  6958. }
  6959. }
  6960. }
  6961. namespace hlsl {
  6962. void TranslateBuiltinOperations(
  6963. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  6964. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  6965. HLOperationLowerHelper helper(HLM);
  6966. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
  6967. Module *M = HLM.GetModule();
  6968. SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
  6969. // generate dxil operation
  6970. for (iplist<Function>::iterator F : M->getFunctionList()) {
  6971. if (F->user_empty())
  6972. continue;
  6973. if (!F->isDeclaration()) {
  6974. continue;
  6975. }
  6976. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  6977. if (group == HLOpcodeGroup::NotHL) {
  6978. // Nothing to do.
  6979. continue;
  6980. }
  6981. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  6982. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
  6983. continue;
  6984. }
  6985. if (group == HLOpcodeGroup::HLIntrinsic) {
  6986. CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
  6987. unsigned opcode = hlsl::GetHLOpcode(CI);
  6988. if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
  6989. NonUniformResourceIndexIntrinsics.push_back(F);
  6990. continue;
  6991. }
  6992. }
  6993. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  6994. }
  6995. // Translate last so value placed in NonUniformSet is still valid.
  6996. if (!NonUniformResourceIndexIntrinsics.empty()) {
  6997. for (auto F : NonUniformResourceIndexIntrinsics) {
  6998. TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, &objHelper);
  6999. }
  7000. }
  7001. }
  7002. }