spirv_hlsl.cpp 200 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771
  1. /*
  2. * Copyright 2016-2021 Robert Konrad
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. /*
  19. * At your option, you may choose to accept this material under either:
  20. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  21. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  22. */
  23. #include "spirv_hlsl.hpp"
  24. #include "GLSL.std.450.h"
  25. #include <algorithm>
  26. #include <assert.h>
  27. using namespace spv;
  28. using namespace SPIRV_CROSS_NAMESPACE;
  29. using namespace std;
  30. enum class ImageFormatNormalizedState
  31. {
  32. None = 0,
  33. Unorm = 1,
  34. Snorm = 2
  35. };
  36. static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt)
  37. {
  38. switch (fmt)
  39. {
  40. case ImageFormatR8:
  41. case ImageFormatR16:
  42. case ImageFormatRg8:
  43. case ImageFormatRg16:
  44. case ImageFormatRgba8:
  45. case ImageFormatRgba16:
  46. case ImageFormatRgb10A2:
  47. return ImageFormatNormalizedState::Unorm;
  48. case ImageFormatR8Snorm:
  49. case ImageFormatR16Snorm:
  50. case ImageFormatRg8Snorm:
  51. case ImageFormatRg16Snorm:
  52. case ImageFormatRgba8Snorm:
  53. case ImageFormatRgba16Snorm:
  54. return ImageFormatNormalizedState::Snorm;
  55. default:
  56. break;
  57. }
  58. return ImageFormatNormalizedState::None;
  59. }
  60. static unsigned image_format_to_components(ImageFormat fmt)
  61. {
  62. switch (fmt)
  63. {
  64. case ImageFormatR8:
  65. case ImageFormatR16:
  66. case ImageFormatR8Snorm:
  67. case ImageFormatR16Snorm:
  68. case ImageFormatR16f:
  69. case ImageFormatR32f:
  70. case ImageFormatR8i:
  71. case ImageFormatR16i:
  72. case ImageFormatR32i:
  73. case ImageFormatR8ui:
  74. case ImageFormatR16ui:
  75. case ImageFormatR32ui:
  76. return 1;
  77. case ImageFormatRg8:
  78. case ImageFormatRg16:
  79. case ImageFormatRg8Snorm:
  80. case ImageFormatRg16Snorm:
  81. case ImageFormatRg16f:
  82. case ImageFormatRg32f:
  83. case ImageFormatRg8i:
  84. case ImageFormatRg16i:
  85. case ImageFormatRg32i:
  86. case ImageFormatRg8ui:
  87. case ImageFormatRg16ui:
  88. case ImageFormatRg32ui:
  89. return 2;
  90. case ImageFormatR11fG11fB10f:
  91. return 3;
  92. case ImageFormatRgba8:
  93. case ImageFormatRgba16:
  94. case ImageFormatRgb10A2:
  95. case ImageFormatRgba8Snorm:
  96. case ImageFormatRgba16Snorm:
  97. case ImageFormatRgba16f:
  98. case ImageFormatRgba32f:
  99. case ImageFormatRgba8i:
  100. case ImageFormatRgba16i:
  101. case ImageFormatRgba32i:
  102. case ImageFormatRgba8ui:
  103. case ImageFormatRgba16ui:
  104. case ImageFormatRgba32ui:
  105. case ImageFormatRgb10a2ui:
  106. return 4;
  107. case ImageFormatUnknown:
  108. return 4; // Assume 4.
  109. default:
  110. SPIRV_CROSS_THROW("Unrecognized typed image format.");
  111. }
  112. }
  113. static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
  114. {
  115. switch (fmt)
  116. {
  117. case ImageFormatR8:
  118. case ImageFormatR16:
  119. if (basetype != SPIRType::Float)
  120. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  121. return "unorm float";
  122. case ImageFormatRg8:
  123. case ImageFormatRg16:
  124. if (basetype != SPIRType::Float)
  125. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  126. return "unorm float2";
  127. case ImageFormatRgba8:
  128. case ImageFormatRgba16:
  129. if (basetype != SPIRType::Float)
  130. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  131. return "unorm float4";
  132. case ImageFormatRgb10A2:
  133. if (basetype != SPIRType::Float)
  134. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  135. return "unorm float4";
  136. case ImageFormatR8Snorm:
  137. case ImageFormatR16Snorm:
  138. if (basetype != SPIRType::Float)
  139. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  140. return "snorm float";
  141. case ImageFormatRg8Snorm:
  142. case ImageFormatRg16Snorm:
  143. if (basetype != SPIRType::Float)
  144. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  145. return "snorm float2";
  146. case ImageFormatRgba8Snorm:
  147. case ImageFormatRgba16Snorm:
  148. if (basetype != SPIRType::Float)
  149. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  150. return "snorm float4";
  151. case ImageFormatR16f:
  152. case ImageFormatR32f:
  153. if (basetype != SPIRType::Float)
  154. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  155. return "float";
  156. case ImageFormatRg16f:
  157. case ImageFormatRg32f:
  158. if (basetype != SPIRType::Float)
  159. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  160. return "float2";
  161. case ImageFormatRgba16f:
  162. case ImageFormatRgba32f:
  163. if (basetype != SPIRType::Float)
  164. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  165. return "float4";
  166. case ImageFormatR11fG11fB10f:
  167. if (basetype != SPIRType::Float)
  168. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  169. return "float3";
  170. case ImageFormatR8i:
  171. case ImageFormatR16i:
  172. case ImageFormatR32i:
  173. if (basetype != SPIRType::Int)
  174. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  175. return "int";
  176. case ImageFormatRg8i:
  177. case ImageFormatRg16i:
  178. case ImageFormatRg32i:
  179. if (basetype != SPIRType::Int)
  180. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  181. return "int2";
  182. case ImageFormatRgba8i:
  183. case ImageFormatRgba16i:
  184. case ImageFormatRgba32i:
  185. if (basetype != SPIRType::Int)
  186. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  187. return "int4";
  188. case ImageFormatR8ui:
  189. case ImageFormatR16ui:
  190. case ImageFormatR32ui:
  191. if (basetype != SPIRType::UInt)
  192. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  193. return "uint";
  194. case ImageFormatRg8ui:
  195. case ImageFormatRg16ui:
  196. case ImageFormatRg32ui:
  197. if (basetype != SPIRType::UInt)
  198. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  199. return "uint2";
  200. case ImageFormatRgba8ui:
  201. case ImageFormatRgba16ui:
  202. case ImageFormatRgba32ui:
  203. if (basetype != SPIRType::UInt)
  204. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  205. return "uint4";
  206. case ImageFormatRgb10a2ui:
  207. if (basetype != SPIRType::UInt)
  208. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  209. return "uint4";
  210. case ImageFormatUnknown:
  211. switch (basetype)
  212. {
  213. case SPIRType::Float:
  214. return "float4";
  215. case SPIRType::Int:
  216. return "int4";
  217. case SPIRType::UInt:
  218. return "uint4";
  219. default:
  220. SPIRV_CROSS_THROW("Unsupported base type for image.");
  221. }
  222. default:
  223. SPIRV_CROSS_THROW("Unrecognized typed image format.");
  224. }
  225. }
  226. string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
  227. {
  228. auto &imagetype = get<SPIRType>(type.image.type);
  229. const char *dim = nullptr;
  230. bool typed_load = false;
  231. uint32_t components = 4;
  232. bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable);
  233. switch (type.image.dim)
  234. {
  235. case Dim1D:
  236. typed_load = type.image.sampled == 2;
  237. dim = "1D";
  238. break;
  239. case Dim2D:
  240. typed_load = type.image.sampled == 2;
  241. dim = "2D";
  242. break;
  243. case Dim3D:
  244. typed_load = type.image.sampled == 2;
  245. dim = "3D";
  246. break;
  247. case DimCube:
  248. if (type.image.sampled == 2)
  249. SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL.");
  250. dim = "Cube";
  251. break;
  252. case DimRect:
  253. SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO
  254. case DimBuffer:
  255. if (type.image.sampled == 1)
  256. return join("Buffer<", type_to_glsl(imagetype), components, ">");
  257. else if (type.image.sampled == 2)
  258. {
  259. if (interlocked_resources.count(id))
  260. return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
  261. ">");
  262. typed_load = !force_image_srv && type.image.sampled == 2;
  263. const char *rw = force_image_srv ? "" : "RW";
  264. return join(rw, "Buffer<",
  265. typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
  266. join(type_to_glsl(imagetype), components),
  267. ">");
  268. }
  269. else
  270. SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
  271. case DimSubpassData:
  272. dim = "2D";
  273. typed_load = false;
  274. break;
  275. default:
  276. SPIRV_CROSS_THROW("Invalid dimension.");
  277. }
  278. const char *arrayed = type.image.arrayed ? "Array" : "";
  279. const char *ms = type.image.ms ? "MS" : "";
  280. const char *rw = typed_load && !force_image_srv ? "RW" : "";
  281. if (force_image_srv)
  282. typed_load = false;
  283. if (typed_load && interlocked_resources.count(id))
  284. rw = "RasterizerOrdered";
  285. return join(rw, "Texture", dim, ms, arrayed, "<",
  286. typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
  287. join(type_to_glsl(imagetype), components),
  288. ">");
  289. }
  290. string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/)
  291. {
  292. auto &imagetype = get<SPIRType>(type.image.type);
  293. string res;
  294. switch (imagetype.basetype)
  295. {
  296. case SPIRType::Int:
  297. res = "i";
  298. break;
  299. case SPIRType::UInt:
  300. res = "u";
  301. break;
  302. default:
  303. break;
  304. }
  305. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  306. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  307. // If we're emulating subpassInput with samplers, force sampler2D
  308. // so we don't have to specify format.
  309. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  310. {
  311. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  312. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  313. res += "sampler";
  314. else
  315. res += type.image.sampled == 2 ? "image" : "texture";
  316. }
  317. else
  318. res += "sampler";
  319. switch (type.image.dim)
  320. {
  321. case Dim1D:
  322. res += "1D";
  323. break;
  324. case Dim2D:
  325. res += "2D";
  326. break;
  327. case Dim3D:
  328. res += "3D";
  329. break;
  330. case DimCube:
  331. res += "CUBE";
  332. break;
  333. case DimBuffer:
  334. res += "Buffer";
  335. break;
  336. case DimSubpassData:
  337. res += "2D";
  338. break;
  339. default:
  340. SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
  341. }
  342. if (type.image.ms)
  343. res += "MS";
  344. if (type.image.arrayed)
  345. res += "Array";
  346. return res;
  347. }
  348. string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id)
  349. {
  350. if (hlsl_options.shader_model <= 30)
  351. return image_type_hlsl_legacy(type, id);
  352. else
  353. return image_type_hlsl_modern(type, id);
  354. }
  355. // The optional id parameter indicates the object whose type we are trying
  356. // to find the description for. It is optional. Most type descriptions do not
  357. // depend on a specific object's use of that type.
  358. string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  359. {
  360. // Ignore the pointer type since GLSL doesn't have pointers.
  361. switch (type.basetype)
  362. {
  363. case SPIRType::Struct:
  364. // Need OpName lookup here to get a "sensible" name for a struct.
  365. if (backend.explicit_struct_type)
  366. return join("struct ", to_name(type.self));
  367. else
  368. return to_name(type.self);
  369. case SPIRType::Image:
  370. case SPIRType::SampledImage:
  371. return image_type_hlsl(type, id);
  372. case SPIRType::Sampler:
  373. return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState";
  374. case SPIRType::Void:
  375. return "void";
  376. default:
  377. break;
  378. }
  379. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  380. {
  381. switch (type.basetype)
  382. {
  383. case SPIRType::Boolean:
  384. return "bool";
  385. case SPIRType::Int:
  386. return backend.basic_int_type;
  387. case SPIRType::UInt:
  388. return backend.basic_uint_type;
  389. case SPIRType::AtomicCounter:
  390. return "atomic_uint";
  391. case SPIRType::Half:
  392. if (hlsl_options.enable_16bit_types)
  393. return "half";
  394. else
  395. return "min16float";
  396. case SPIRType::Short:
  397. if (hlsl_options.enable_16bit_types)
  398. return "int16_t";
  399. else
  400. return "min16int";
  401. case SPIRType::UShort:
  402. if (hlsl_options.enable_16bit_types)
  403. return "uint16_t";
  404. else
  405. return "min16uint";
  406. case SPIRType::Float:
  407. return "float";
  408. case SPIRType::Double:
  409. return "double";
  410. case SPIRType::Int64:
  411. if (hlsl_options.shader_model < 60)
  412. SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
  413. return "int64_t";
  414. case SPIRType::UInt64:
  415. if (hlsl_options.shader_model < 60)
  416. SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
  417. return "uint64_t";
  418. case SPIRType::AccelerationStructure:
  419. return "RaytracingAccelerationStructure";
  420. case SPIRType::RayQuery:
  421. return "RayQuery<RAY_FLAG_NONE>";
  422. default:
  423. return "???";
  424. }
  425. }
  426. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  427. {
  428. switch (type.basetype)
  429. {
  430. case SPIRType::Boolean:
  431. return join("bool", type.vecsize);
  432. case SPIRType::Int:
  433. return join("int", type.vecsize);
  434. case SPIRType::UInt:
  435. return join("uint", type.vecsize);
  436. case SPIRType::Half:
  437. return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize);
  438. case SPIRType::Short:
  439. return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize);
  440. case SPIRType::UShort:
  441. return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize);
  442. case SPIRType::Float:
  443. return join("float", type.vecsize);
  444. case SPIRType::Double:
  445. return join("double", type.vecsize);
  446. case SPIRType::Int64:
  447. return join("i64vec", type.vecsize);
  448. case SPIRType::UInt64:
  449. return join("u64vec", type.vecsize);
  450. default:
  451. return "???";
  452. }
  453. }
  454. else
  455. {
  456. switch (type.basetype)
  457. {
  458. case SPIRType::Boolean:
  459. return join("bool", type.columns, "x", type.vecsize);
  460. case SPIRType::Int:
  461. return join("int", type.columns, "x", type.vecsize);
  462. case SPIRType::UInt:
  463. return join("uint", type.columns, "x", type.vecsize);
  464. case SPIRType::Half:
  465. return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize);
  466. case SPIRType::Short:
  467. return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize);
  468. case SPIRType::UShort:
  469. return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize);
  470. case SPIRType::Float:
  471. return join("float", type.columns, "x", type.vecsize);
  472. case SPIRType::Double:
  473. return join("double", type.columns, "x", type.vecsize);
  474. // Matrix types not supported for int64/uint64.
  475. default:
  476. return "???";
  477. }
  478. }
  479. }
  480. void CompilerHLSL::emit_header()
  481. {
  482. for (auto &header : header_lines)
  483. statement(header);
  484. if (header_lines.size() > 0)
  485. {
  486. statement("");
  487. }
  488. }
  489. void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var)
  490. {
  491. add_resource_name(var.self);
  492. // The global copies of I/O variables should not contain interpolation qualifiers.
  493. // These are emitted inside the interface structs.
  494. auto &flags = ir.meta[var.self].decoration.decoration_flags;
  495. auto old_flags = flags;
  496. flags.reset();
  497. statement("static ", variable_decl(var), ";");
  498. flags = old_flags;
  499. }
  500. const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  501. {
  502. // Input and output variables are handled specially in HLSL backend.
  503. // The variables are declared as global, private variables, and do not need any qualifiers.
  504. if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  505. var.storage == StorageClassPushConstant)
  506. {
  507. return "uniform ";
  508. }
  509. return "";
  510. }
  511. void CompilerHLSL::emit_builtin_outputs_in_struct()
  512. {
  513. auto &execution = get_entry_point();
  514. bool legacy = hlsl_options.shader_model <= 30;
  515. active_output_builtins.for_each_bit([&](uint32_t i) {
  516. const char *type = nullptr;
  517. const char *semantic = nullptr;
  518. auto builtin = static_cast<BuiltIn>(i);
  519. switch (builtin)
  520. {
  521. case BuiltInPosition:
  522. type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
  523. semantic = legacy ? "POSITION" : "SV_Position";
  524. break;
  525. case BuiltInSampleMask:
  526. if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment)
  527. SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher.");
  528. type = "uint";
  529. semantic = "SV_Coverage";
  530. break;
  531. case BuiltInFragDepth:
  532. type = "float";
  533. if (legacy)
  534. {
  535. semantic = "DEPTH";
  536. }
  537. else
  538. {
  539. if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater))
  540. semantic = "SV_DepthGreaterEqual";
  541. else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess))
  542. semantic = "SV_DepthLessEqual";
  543. else
  544. semantic = "SV_Depth";
  545. }
  546. break;
  547. case BuiltInClipDistance:
  548. {
  549. static const char *types[] = { "float", "float2", "float3", "float4" };
  550. // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
  551. if (execution.model == ExecutionModelMeshEXT)
  552. {
  553. if (clip_distance_count > 4)
  554. SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders.");
  555. if (clip_distance_count == 1)
  556. {
  557. // Avoids having to hack up access_chain code. Makes it trivially indexable.
  558. statement("float gl_ClipDistance[1] : SV_ClipDistance;");
  559. }
  560. else
  561. {
  562. // Replace array with vector directly, avoids any weird fixup path.
  563. statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;");
  564. }
  565. }
  566. else
  567. {
  568. for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
  569. {
  570. uint32_t to_declare = clip_distance_count - clip;
  571. if (to_declare > 4)
  572. to_declare = 4;
  573. uint32_t semantic_index = clip / 4;
  574. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
  575. " : SV_ClipDistance", semantic_index, ";");
  576. }
  577. }
  578. break;
  579. }
  580. case BuiltInCullDistance:
  581. {
  582. static const char *types[] = { "float", "float2", "float3", "float4" };
  583. // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
  584. if (execution.model == ExecutionModelMeshEXT)
  585. {
  586. if (cull_distance_count > 4)
  587. SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders.");
  588. if (cull_distance_count == 1)
  589. {
  590. // Avoids having to hack up access_chain code. Makes it trivially indexable.
  591. statement("float gl_CullDistance[1] : SV_CullDistance;");
  592. }
  593. else
  594. {
  595. // Replace array with vector directly, avoids any weird fixup path.
  596. statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;");
  597. }
  598. }
  599. else
  600. {
  601. for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
  602. {
  603. uint32_t to_declare = cull_distance_count - cull;
  604. if (to_declare > 4)
  605. to_declare = 4;
  606. uint32_t semantic_index = cull / 4;
  607. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
  608. " : SV_CullDistance", semantic_index, ";");
  609. }
  610. }
  611. break;
  612. }
  613. case BuiltInPointSize:
  614. // If point_size_compat is enabled, just ignore PointSize.
  615. // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
  616. // even if it means working around the missing feature.
  617. if (legacy)
  618. {
  619. type = "float";
  620. semantic = "PSIZE";
  621. }
  622. else if (!hlsl_options.point_size_compat)
  623. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  624. break;
  625. case BuiltInLayer:
  626. case BuiltInPrimitiveId:
  627. case BuiltInViewportIndex:
  628. case BuiltInPrimitiveShadingRateKHR:
  629. case BuiltInCullPrimitiveEXT:
  630. // per-primitive attributes handled separatly
  631. break;
  632. case BuiltInPrimitivePointIndicesEXT:
  633. case BuiltInPrimitiveLineIndicesEXT:
  634. case BuiltInPrimitiveTriangleIndicesEXT:
  635. // meshlet local-index buffer handled separatly
  636. break;
  637. default:
  638. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  639. }
  640. if (type && semantic)
  641. statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
  642. });
  643. }
  644. void CompilerHLSL::emit_builtin_primitive_outputs_in_struct()
  645. {
  646. active_output_builtins.for_each_bit([&](uint32_t i) {
  647. const char *type = nullptr;
  648. const char *semantic = nullptr;
  649. auto builtin = static_cast<BuiltIn>(i);
  650. switch (builtin)
  651. {
  652. case BuiltInLayer:
  653. {
  654. if (hlsl_options.shader_model < 50)
  655. SPIRV_CROSS_THROW("Render target array index output is only supported in SM 5.0 or higher.");
  656. type = "uint";
  657. semantic = "SV_RenderTargetArrayIndex";
  658. break;
  659. }
  660. case BuiltInPrimitiveId:
  661. type = "uint";
  662. semantic = "SV_PrimitiveID";
  663. break;
  664. case BuiltInViewportIndex:
  665. type = "uint";
  666. semantic = "SV_ViewportArrayIndex";
  667. break;
  668. case BuiltInPrimitiveShadingRateKHR:
  669. type = "uint";
  670. semantic = "SV_ShadingRate";
  671. break;
  672. case BuiltInCullPrimitiveEXT:
  673. type = "bool";
  674. semantic = "SV_CullPrimitive";
  675. break;
  676. default:
  677. break;
  678. }
  679. if (type && semantic)
  680. statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
  681. });
  682. }
  683. void CompilerHLSL::emit_builtin_inputs_in_struct()
  684. {
  685. bool legacy = hlsl_options.shader_model <= 30;
  686. active_input_builtins.for_each_bit([&](uint32_t i) {
  687. const char *type = nullptr;
  688. const char *semantic = nullptr;
  689. auto builtin = static_cast<BuiltIn>(i);
  690. switch (builtin)
  691. {
  692. case BuiltInFragCoord:
  693. type = "float4";
  694. semantic = legacy ? "VPOS" : "SV_Position";
  695. break;
  696. case BuiltInVertexId:
  697. case BuiltInVertexIndex:
  698. if (legacy)
  699. SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower.");
  700. type = "uint";
  701. semantic = "SV_VertexID";
  702. break;
  703. case BuiltInPrimitiveId:
  704. type = "uint";
  705. semantic = "SV_PrimitiveID";
  706. break;
  707. case BuiltInInstanceId:
  708. case BuiltInInstanceIndex:
  709. if (legacy)
  710. SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower.");
  711. type = "uint";
  712. semantic = "SV_InstanceID";
  713. break;
  714. case BuiltInSampleId:
  715. if (legacy)
  716. SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower.");
  717. type = "uint";
  718. semantic = "SV_SampleIndex";
  719. break;
  720. case BuiltInSampleMask:
  721. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  722. SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher.");
  723. type = "uint";
  724. semantic = "SV_Coverage";
  725. break;
  726. case BuiltInGlobalInvocationId:
  727. type = "uint3";
  728. semantic = "SV_DispatchThreadID";
  729. break;
  730. case BuiltInLocalInvocationId:
  731. type = "uint3";
  732. semantic = "SV_GroupThreadID";
  733. break;
  734. case BuiltInLocalInvocationIndex:
  735. type = "uint";
  736. semantic = "SV_GroupIndex";
  737. break;
  738. case BuiltInWorkgroupId:
  739. type = "uint3";
  740. semantic = "SV_GroupID";
  741. break;
  742. case BuiltInFrontFacing:
  743. type = "bool";
  744. semantic = "SV_IsFrontFace";
  745. break;
  746. case BuiltInViewIndex:
  747. if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment))
  748. SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher.");
  749. type = "uint";
  750. semantic = "SV_ViewID";
  751. break;
  752. case BuiltInNumWorkgroups:
  753. case BuiltInSubgroupSize:
  754. case BuiltInSubgroupLocalInvocationId:
  755. case BuiltInSubgroupEqMask:
  756. case BuiltInSubgroupLtMask:
  757. case BuiltInSubgroupLeMask:
  758. case BuiltInSubgroupGtMask:
  759. case BuiltInSubgroupGeMask:
  760. case BuiltInBaseVertex:
  761. case BuiltInBaseInstance:
  762. // Handled specially.
  763. break;
  764. case BuiltInHelperInvocation:
  765. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  766. SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
  767. break;
  768. case BuiltInClipDistance:
  769. // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
  770. for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
  771. {
  772. uint32_t to_declare = clip_distance_count - clip;
  773. if (to_declare > 4)
  774. to_declare = 4;
  775. uint32_t semantic_index = clip / 4;
  776. static const char *types[] = { "float", "float2", "float3", "float4" };
  777. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
  778. " : SV_ClipDistance", semantic_index, ";");
  779. }
  780. break;
  781. case BuiltInCullDistance:
  782. // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
  783. for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
  784. {
  785. uint32_t to_declare = cull_distance_count - cull;
  786. if (to_declare > 4)
  787. to_declare = 4;
  788. uint32_t semantic_index = cull / 4;
  789. static const char *types[] = { "float", "float2", "float3", "float4" };
  790. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
  791. " : SV_CullDistance", semantic_index, ";");
  792. }
  793. break;
  794. case BuiltInPointCoord:
  795. // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
  796. if (hlsl_options.point_coord_compat)
  797. break;
  798. else
  799. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  800. case BuiltInLayer:
  801. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  802. SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher.");
  803. type = "uint";
  804. semantic = "SV_RenderTargetArrayIndex";
  805. break;
  806. default:
  807. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  808. }
  809. if (type && semantic)
  810. statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";");
  811. });
  812. }
  813. uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
  814. {
  815. // TODO: Need to verify correctness.
  816. uint32_t elements = 0;
  817. if (type.basetype == SPIRType::Struct)
  818. {
  819. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  820. elements += type_to_consumed_locations(get<SPIRType>(type.member_types[i]));
  821. }
  822. else
  823. {
  824. uint32_t array_multiplier = 1;
  825. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  826. {
  827. if (type.array_size_literal[i])
  828. array_multiplier *= type.array[i];
  829. else
  830. array_multiplier *= evaluate_constant_u32(type.array[i]);
  831. }
  832. elements += array_multiplier * type.columns;
  833. }
  834. return elements;
  835. }
  836. string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
  837. {
  838. string res;
  839. //if (flags & (1ull << DecorationSmooth))
  840. // res += "linear ";
  841. if (flags.get(DecorationFlat))
  842. res += "nointerpolation ";
  843. if (flags.get(DecorationNoPerspective))
  844. res += "noperspective ";
  845. if (flags.get(DecorationCentroid))
  846. res += "centroid ";
  847. if (flags.get(DecorationPatch))
  848. res += "patch "; // Seems to be different in actual HLSL.
  849. if (flags.get(DecorationSample))
  850. res += "sample ";
  851. if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
  852. res += "precise "; // Not supported?
  853. return res;
  854. }
  855. std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc)
  856. {
  857. if (em == ExecutionModelVertex && sc == StorageClassInput)
  858. {
  859. // We have a vertex attribute - we should look at remapping it if the user provided
  860. // vertex attribute hints.
  861. for (auto &attribute : remap_vertex_attributes)
  862. if (attribute.location == location)
  863. return attribute.semantic;
  864. }
  865. // Not a vertex attribute, or no remap_vertex_attributes entry.
  866. return join("TEXCOORD", location);
  867. }
  868. std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var)
  869. {
  870. // We cannot emit static const initializer for block constants for practical reasons,
  871. // so just inline the initializer.
  872. // FIXME: There is a theoretical problem here if someone tries to composite extract
  873. // into this initializer since we don't declare it properly, but that is somewhat non-sensical.
  874. auto &type = get<SPIRType>(var.basetype);
  875. bool is_block = has_decoration(type.self, DecorationBlock);
  876. auto *c = maybe_get<SPIRConstant>(var.initializer);
  877. if (is_block && c)
  878. return constant_expression(*c);
  879. else
  880. return CompilerGLSL::to_initializer_expression(var);
  881. }
  882. void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
  883. uint32_t location,
  884. std::unordered_set<uint32_t> &active_locations)
  885. {
  886. auto &execution = get_entry_point();
  887. auto type = get<SPIRType>(var.basetype);
  888. auto semantic = to_semantic(location, execution.model, var.storage);
  889. auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index));
  890. auto &mbr_type = get<SPIRType>(type.member_types[member_index]);
  891. statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)),
  892. type_to_glsl(mbr_type),
  893. " ", mbr_name, type_to_array_glsl(mbr_type),
  894. " : ", semantic, ";");
  895. // Structs and arrays should consume more locations.
  896. uint32_t consumed_locations = type_to_consumed_locations(mbr_type);
  897. for (uint32_t i = 0; i < consumed_locations; i++)
  898. active_locations.insert(location + i);
  899. }
  900. void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations)
  901. {
  902. auto &execution = get_entry_point();
  903. auto type = get<SPIRType>(var.basetype);
  904. string binding;
  905. bool use_location_number = true;
  906. bool need_matrix_unroll = false;
  907. bool legacy = hlsl_options.shader_model <= 30;
  908. if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  909. {
  910. // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1.
  911. uint32_t index = get_decoration(var.self, DecorationIndex);
  912. uint32_t location = get_decoration(var.self, DecorationLocation);
  913. if (index != 0 && location != 0)
  914. SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL.");
  915. binding = join(legacy ? "COLOR" : "SV_Target", location + index);
  916. use_location_number = false;
  917. if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP)
  918. type.vecsize = 4;
  919. }
  920. else if (var.storage == StorageClassInput && execution.model == ExecutionModelVertex)
  921. {
  922. need_matrix_unroll = true;
  923. if (legacy) // Inputs must be floating-point in legacy targets.
  924. type.basetype = SPIRType::Float;
  925. }
  926. const auto get_vacant_location = [&]() -> uint32_t {
  927. for (uint32_t i = 0; i < 64; i++)
  928. if (!active_locations.count(i))
  929. return i;
  930. SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted.");
  931. };
  932. auto name = to_name(var.self);
  933. if (use_location_number)
  934. {
  935. uint32_t location_number;
  936. // If an explicit location exists, use it with TEXCOORD[N] semantic.
  937. // Otherwise, pick a vacant location.
  938. if (has_decoration(var.self, DecorationLocation))
  939. location_number = get_decoration(var.self, DecorationLocation);
  940. else
  941. location_number = get_vacant_location();
  942. // Allow semantic remap if specified.
  943. auto semantic = to_semantic(location_number, execution.model, var.storage);
  944. if (need_matrix_unroll && type.columns > 1)
  945. {
  946. if (!type.array.empty())
  947. SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported.");
  948. // Unroll matrices.
  949. for (uint32_t i = 0; i < type.columns; i++)
  950. {
  951. SPIRType newtype = type;
  952. newtype.columns = 1;
  953. string effective_semantic;
  954. if (hlsl_options.flatten_matrix_vertex_input_semantics)
  955. effective_semantic = to_semantic(location_number, execution.model, var.storage);
  956. else
  957. effective_semantic = join(semantic, "_", i);
  958. statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)),
  959. variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";");
  960. active_locations.insert(location_number++);
  961. }
  962. }
  963. else
  964. {
  965. auto decl_type = type;
  966. if (execution.model == ExecutionModelMeshEXT)
  967. {
  968. decl_type.array.erase(decl_type.array.begin());
  969. decl_type.array_size_literal.erase(decl_type.array_size_literal.begin());
  970. }
  971. statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ",
  972. semantic, ";");
  973. // Structs and arrays should consume more locations.
  974. uint32_t consumed_locations = type_to_consumed_locations(decl_type);
  975. for (uint32_t i = 0; i < consumed_locations; i++)
  976. active_locations.insert(location_number + i);
  977. }
  978. }
  979. else
  980. {
  981. statement(variable_decl(type, name), " : ", binding, ";");
  982. }
  983. }
  984. std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage)
  985. {
  986. switch (builtin)
  987. {
  988. case BuiltInVertexId:
  989. return "gl_VertexID";
  990. case BuiltInInstanceId:
  991. return "gl_InstanceID";
  992. case BuiltInNumWorkgroups:
  993. {
  994. if (!num_workgroups_builtin)
  995. SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. "
  996. "Cannot emit code for this builtin.");
  997. auto &var = get<SPIRVariable>(num_workgroups_builtin);
  998. auto &type = get<SPIRType>(var.basetype);
  999. auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0));
  1000. ParsedIR::sanitize_underscores(ret);
  1001. return ret;
  1002. }
  1003. case BuiltInPointCoord:
  1004. // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
  1005. return "float2(0.5f, 0.5f)";
  1006. case BuiltInSubgroupLocalInvocationId:
  1007. return "WaveGetLaneIndex()";
  1008. case BuiltInSubgroupSize:
  1009. return "WaveGetLaneCount()";
  1010. case BuiltInHelperInvocation:
  1011. return "IsHelperLane()";
  1012. default:
  1013. return CompilerGLSL::builtin_to_glsl(builtin, storage);
  1014. }
  1015. }
  1016. void CompilerHLSL::emit_builtin_variables()
  1017. {
  1018. Bitset builtins = active_input_builtins;
  1019. builtins.merge_or(active_output_builtins);
  1020. std::unordered_map<uint32_t, ID> builtin_to_initializer;
  1021. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1022. if (!is_builtin_variable(var) || var.storage != StorageClassOutput || !var.initializer)
  1023. return;
  1024. auto *c = this->maybe_get<SPIRConstant>(var.initializer);
  1025. if (!c)
  1026. return;
  1027. auto &type = this->get<SPIRType>(var.basetype);
  1028. if (type.basetype == SPIRType::Struct)
  1029. {
  1030. uint32_t member_count = uint32_t(type.member_types.size());
  1031. for (uint32_t i = 0; i < member_count; i++)
  1032. {
  1033. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  1034. {
  1035. builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] =
  1036. c->subconstants[i];
  1037. }
  1038. }
  1039. }
  1040. else if (has_decoration(var.self, DecorationBuiltIn))
  1041. builtin_to_initializer[get_decoration(var.self, DecorationBuiltIn)] = var.initializer;
  1042. });
  1043. // Emit global variables for the interface variables which are statically used by the shader.
  1044. builtins.for_each_bit([&](uint32_t i) {
  1045. const char *type = nullptr;
  1046. auto builtin = static_cast<BuiltIn>(i);
  1047. uint32_t array_size = 0;
  1048. string init_expr;
  1049. auto init_itr = builtin_to_initializer.find(builtin);
  1050. if (init_itr != builtin_to_initializer.end())
  1051. init_expr = join(" = ", to_expression(init_itr->second));
  1052. if (get_execution_model() == ExecutionModelMeshEXT)
  1053. {
  1054. if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  1055. builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId ||
  1056. builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT ||
  1057. builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT ||
  1058. builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT)
  1059. {
  1060. return;
  1061. }
  1062. }
  1063. switch (builtin)
  1064. {
  1065. case BuiltInFragCoord:
  1066. case BuiltInPosition:
  1067. type = "float4";
  1068. break;
  1069. case BuiltInFragDepth:
  1070. type = "float";
  1071. break;
  1072. case BuiltInVertexId:
  1073. case BuiltInVertexIndex:
  1074. case BuiltInInstanceIndex:
  1075. type = "int";
  1076. if (hlsl_options.support_nonzero_base_vertex_base_instance)
  1077. base_vertex_info.used = true;
  1078. break;
  1079. case BuiltInBaseVertex:
  1080. case BuiltInBaseInstance:
  1081. type = "int";
  1082. base_vertex_info.used = true;
  1083. break;
  1084. case BuiltInInstanceId:
  1085. case BuiltInSampleId:
  1086. type = "int";
  1087. break;
  1088. case BuiltInPointSize:
  1089. if (hlsl_options.point_size_compat || hlsl_options.shader_model <= 30)
  1090. {
  1091. // Just emit the global variable, it will be ignored.
  1092. type = "float";
  1093. break;
  1094. }
  1095. else
  1096. SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
  1097. case BuiltInGlobalInvocationId:
  1098. case BuiltInLocalInvocationId:
  1099. case BuiltInWorkgroupId:
  1100. type = "uint3";
  1101. break;
  1102. case BuiltInLocalInvocationIndex:
  1103. type = "uint";
  1104. break;
  1105. case BuiltInFrontFacing:
  1106. type = "bool";
  1107. break;
  1108. case BuiltInNumWorkgroups:
  1109. case BuiltInPointCoord:
  1110. // Handled specially.
  1111. break;
  1112. case BuiltInSubgroupLocalInvocationId:
  1113. case BuiltInSubgroupSize:
  1114. if (hlsl_options.shader_model < 60)
  1115. SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
  1116. break;
  1117. case BuiltInSubgroupEqMask:
  1118. case BuiltInSubgroupLtMask:
  1119. case BuiltInSubgroupLeMask:
  1120. case BuiltInSubgroupGtMask:
  1121. case BuiltInSubgroupGeMask:
  1122. if (hlsl_options.shader_model < 60)
  1123. SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
  1124. type = "uint4";
  1125. break;
  1126. case BuiltInHelperInvocation:
  1127. if (hlsl_options.shader_model < 50)
  1128. SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation.");
  1129. break;
  1130. case BuiltInClipDistance:
  1131. array_size = clip_distance_count;
  1132. type = "float";
  1133. break;
  1134. case BuiltInCullDistance:
  1135. array_size = cull_distance_count;
  1136. type = "float";
  1137. break;
  1138. case BuiltInSampleMask:
  1139. type = "int";
  1140. break;
  1141. case BuiltInPrimitiveId:
  1142. case BuiltInViewIndex:
  1143. case BuiltInLayer:
  1144. type = "uint";
  1145. break;
  1146. case BuiltInViewportIndex:
  1147. case BuiltInPrimitiveShadingRateKHR:
  1148. case BuiltInPrimitiveLineIndicesEXT:
  1149. case BuiltInCullPrimitiveEXT:
  1150. type = "uint";
  1151. break;
  1152. default:
  1153. SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
  1154. }
  1155. StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput;
  1156. if (type)
  1157. {
  1158. if (array_size)
  1159. statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";");
  1160. else
  1161. statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";");
  1162. }
  1163. // SampleMask can be both in and out with sample builtin, in this case we have already
  1164. // declared the input variable and we need to add the output one now.
  1165. if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i))
  1166. {
  1167. statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";");
  1168. }
  1169. });
  1170. if (base_vertex_info.used)
  1171. {
  1172. string binding_info;
  1173. if (base_vertex_info.explicit_binding)
  1174. {
  1175. binding_info = join(" : register(b", base_vertex_info.register_index);
  1176. if (base_vertex_info.register_space)
  1177. binding_info += join(", space", base_vertex_info.register_space);
  1178. binding_info += ")";
  1179. }
  1180. statement("cbuffer SPIRV_Cross_VertexInfo", binding_info);
  1181. begin_scope();
  1182. statement("int SPIRV_Cross_BaseVertex;");
  1183. statement("int SPIRV_Cross_BaseInstance;");
  1184. end_scope_decl();
  1185. statement("");
  1186. }
  1187. }
  1188. void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space)
  1189. {
  1190. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1191. {
  1192. base_vertex_info.explicit_binding = true;
  1193. base_vertex_info.register_space = register_space;
  1194. base_vertex_info.register_index = register_index;
  1195. }
  1196. }
  1197. void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding)
  1198. {
  1199. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1200. base_vertex_info.explicit_binding = false;
  1201. }
  1202. bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const
  1203. {
  1204. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1205. return base_vertex_info.used;
  1206. else
  1207. return false;
  1208. }
  1209. void CompilerHLSL::emit_composite_constants()
  1210. {
  1211. // HLSL cannot declare structs or arrays inline, so we must move them out to
  1212. // global constants directly.
  1213. bool emitted = false;
  1214. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  1215. if (c.specialization)
  1216. return;
  1217. auto &type = this->get<SPIRType>(c.constant_type);
  1218. if (type.basetype == SPIRType::Struct && is_builtin_type(type))
  1219. return;
  1220. if (type.basetype == SPIRType::Struct || !type.array.empty())
  1221. {
  1222. add_resource_name(c.self);
  1223. auto name = to_name(c.self);
  1224. statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
  1225. emitted = true;
  1226. }
  1227. });
  1228. if (emitted)
  1229. statement("");
  1230. }
  1231. void CompilerHLSL::emit_specialization_constants_and_structs()
  1232. {
  1233. bool emitted = false;
  1234. SpecializationConstant wg_x, wg_y, wg_z;
  1235. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  1236. std::unordered_set<TypeID> io_block_types;
  1237. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  1238. auto &type = this->get<SPIRType>(var.basetype);
  1239. if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  1240. !var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  1241. interface_variable_exists_in_entry_point(var.self) &&
  1242. has_decoration(type.self, DecorationBlock))
  1243. {
  1244. io_block_types.insert(type.self);
  1245. }
  1246. });
  1247. auto loop_lock = ir.create_loop_hard_lock();
  1248. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  1249. {
  1250. auto &id = ir.ids[id_];
  1251. if (id.get_type() == TypeConstant)
  1252. {
  1253. auto &c = id.get<SPIRConstant>();
  1254. if (c.self == workgroup_size_id)
  1255. {
  1256. statement("static const uint3 gl_WorkGroupSize = ",
  1257. constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
  1258. emitted = true;
  1259. }
  1260. else if (c.specialization)
  1261. {
  1262. auto &type = get<SPIRType>(c.constant_type);
  1263. add_resource_name(c.self);
  1264. auto name = to_name(c.self);
  1265. if (has_decoration(c.self, DecorationSpecId))
  1266. {
  1267. // HLSL does not support specialization constants, so fallback to macros.
  1268. c.specialization_constant_macro_name =
  1269. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  1270. statement("#ifndef ", c.specialization_constant_macro_name);
  1271. statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
  1272. statement("#endif");
  1273. statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";");
  1274. }
  1275. else
  1276. statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
  1277. emitted = true;
  1278. }
  1279. }
  1280. else if (id.get_type() == TypeConstantOp)
  1281. {
  1282. auto &c = id.get<SPIRConstantOp>();
  1283. auto &type = get<SPIRType>(c.basetype);
  1284. add_resource_name(c.self);
  1285. auto name = to_name(c.self);
  1286. statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
  1287. emitted = true;
  1288. }
  1289. else if (id.get_type() == TypeType)
  1290. {
  1291. auto &type = id.get<SPIRType>();
  1292. bool is_non_io_block = has_decoration(type.self, DecorationBlock) &&
  1293. io_block_types.count(type.self) == 0;
  1294. bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock);
  1295. if (type.basetype == SPIRType::Struct && type.array.empty() &&
  1296. !type.pointer && !is_non_io_block && !is_buffer_block)
  1297. {
  1298. if (emitted)
  1299. statement("");
  1300. emitted = false;
  1301. emit_struct(type);
  1302. }
  1303. }
  1304. else if (id.get_type() == TypeUndef)
  1305. {
  1306. auto &undef = id.get<SPIRUndef>();
  1307. auto &type = this->get<SPIRType>(undef.basetype);
  1308. // OpUndef can be void for some reason ...
  1309. if (type.basetype == SPIRType::Void)
  1310. return;
  1311. string initializer;
  1312. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  1313. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  1314. statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  1315. emitted = true;
  1316. }
  1317. }
  1318. if (emitted)
  1319. statement("");
  1320. }
  1321. void CompilerHLSL::replace_illegal_names()
  1322. {
  1323. static const unordered_set<string> keywords = {
  1324. // Additional HLSL specific keywords.
  1325. // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
  1326. "AppendStructuredBuffer", "asm", "asm_fragment",
  1327. "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer",
  1328. "case", "cbuffer", "centroid", "class", "column_major", "compile",
  1329. "compile_fragment", "CompileShader", "const", "continue", "ComputeShader",
  1330. "ConsumeStructuredBuffer",
  1331. "default", "DepthStencilState", "DepthStencilView", "discard", "do",
  1332. "double", "DomainShader", "dword",
  1333. "else", "export", "false", "float", "for", "fxgroup",
  1334. "GeometryShader", "groupshared", "half", "HullShader",
  1335. "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface",
  1336. "line", "lineadj", "linear", "LineStream",
  1337. "matrix", "min16float", "min10float", "min16int", "min16uint",
  1338. "namespace", "nointerpolation", "noperspective", "NULL",
  1339. "out", "OutputPatch",
  1340. "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point",
  1341. "PointStream", "precise", "RasterizerState", "RenderTargetView",
  1342. "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
  1343. "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
  1344. "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState",
  1345. "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state",
  1346. "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer",
  1347. "technique", "technique10", "technique11", "texture", "Texture1D",
  1348. "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
  1349. "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
  1350. "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
  1351. "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while",
  1352. };
  1353. CompilerGLSL::replace_illegal_names(keywords);
  1354. CompilerGLSL::replace_illegal_names();
  1355. }
  1356. void CompilerHLSL::emit_resources()
  1357. {
  1358. auto &execution = get_entry_point();
  1359. replace_illegal_names();
  1360. switch (execution.model)
  1361. {
  1362. case ExecutionModelGeometry:
  1363. case ExecutionModelTessellationControl:
  1364. case ExecutionModelTessellationEvaluation:
  1365. case ExecutionModelMeshEXT:
  1366. fixup_implicit_builtin_block_names(execution.model);
  1367. break;
  1368. default:
  1369. break;
  1370. }
  1371. emit_specialization_constants_and_structs();
  1372. emit_composite_constants();
  1373. bool emitted = false;
  1374. // Output UBOs and SSBOs
  1375. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1376. auto &type = this->get<SPIRType>(var.basetype);
  1377. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
  1378. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  1379. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1380. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  1381. has_block_flags)
  1382. {
  1383. emit_buffer_block(var);
  1384. emitted = true;
  1385. }
  1386. });
  1387. // Output push constant blocks
  1388. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1389. auto &type = this->get<SPIRType>(var.basetype);
  1390. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  1391. !is_hidden_variable(var))
  1392. {
  1393. emit_push_constant_block(var);
  1394. emitted = true;
  1395. }
  1396. });
  1397. if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 &&
  1398. active_output_builtins.get(BuiltInPosition))
  1399. {
  1400. statement("uniform float4 gl_HalfPixel;");
  1401. emitted = true;
  1402. }
  1403. bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30;
  1404. // Output Uniform Constants (values, samplers, images, etc).
  1405. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1406. auto &type = this->get<SPIRType>(var.basetype);
  1407. // If we're remapping separate samplers and images, only emit the combined samplers.
  1408. if (skip_separate_image_sampler)
  1409. {
  1410. // Sampler buffers are always used without a sampler, and they will also work in regular D3D.
  1411. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  1412. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  1413. bool separate_sampler = type.basetype == SPIRType::Sampler;
  1414. if (!sampler_buffer && (separate_image || separate_sampler))
  1415. return;
  1416. }
  1417. if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable &&
  1418. type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
  1419. !is_hidden_variable(var))
  1420. {
  1421. emit_uniform(var);
  1422. emitted = true;
  1423. }
  1424. });
  1425. if (emitted)
  1426. statement("");
  1427. emitted = false;
  1428. // Emit builtin input and output variables here.
  1429. emit_builtin_variables();
  1430. if (execution.model != ExecutionModelMeshEXT)
  1431. {
  1432. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1433. auto &type = this->get<SPIRType>(var.basetype);
  1434. if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
  1435. (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
  1436. interface_variable_exists_in_entry_point(var.self))
  1437. {
  1438. // Builtin variables are handled separately.
  1439. emit_interface_block_globally(var);
  1440. emitted = true;
  1441. }
  1442. });
  1443. }
  1444. if (emitted)
  1445. statement("");
  1446. emitted = false;
  1447. require_input = false;
  1448. require_output = false;
  1449. unordered_set<uint32_t> active_inputs;
  1450. unordered_set<uint32_t> active_outputs;
  1451. struct IOVariable
  1452. {
  1453. const SPIRVariable *var;
  1454. uint32_t location;
  1455. uint32_t block_member_index;
  1456. bool block;
  1457. };
  1458. SmallVector<IOVariable> input_variables;
  1459. SmallVector<IOVariable> output_variables;
  1460. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1461. auto &type = this->get<SPIRType>(var.basetype);
  1462. bool block = has_decoration(type.self, DecorationBlock);
  1463. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  1464. return;
  1465. if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  1466. interface_variable_exists_in_entry_point(var.self))
  1467. {
  1468. if (block)
  1469. {
  1470. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  1471. {
  1472. uint32_t location = get_declared_member_location(var, i, false);
  1473. if (var.storage == StorageClassInput)
  1474. input_variables.push_back({ &var, location, i, true });
  1475. else
  1476. output_variables.push_back({ &var, location, i, true });
  1477. }
  1478. }
  1479. else
  1480. {
  1481. uint32_t location = get_decoration(var.self, DecorationLocation);
  1482. if (var.storage == StorageClassInput)
  1483. input_variables.push_back({ &var, location, 0, false });
  1484. else
  1485. output_variables.push_back({ &var, location, 0, false });
  1486. }
  1487. }
  1488. });
  1489. const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool {
  1490. // Sort input and output variables based on, from more robust to less robust:
  1491. // - Location
  1492. // - Variable has a location
  1493. // - Name comparison
  1494. // - Variable has a name
  1495. // - Fallback: ID
  1496. bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation);
  1497. bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation);
  1498. if (has_location_a && has_location_b)
  1499. return a.location < b.location;
  1500. else if (has_location_a && !has_location_b)
  1501. return true;
  1502. else if (!has_location_a && has_location_b)
  1503. return false;
  1504. const auto &name1 = to_name(a.var->self);
  1505. const auto &name2 = to_name(b.var->self);
  1506. if (name1.empty() && name2.empty())
  1507. return a.var->self < b.var->self;
  1508. else if (name1.empty())
  1509. return true;
  1510. else if (name2.empty())
  1511. return false;
  1512. return name1.compare(name2) < 0;
  1513. };
  1514. auto input_builtins = active_input_builtins;
  1515. input_builtins.clear(BuiltInNumWorkgroups);
  1516. input_builtins.clear(BuiltInPointCoord);
  1517. input_builtins.clear(BuiltInSubgroupSize);
  1518. input_builtins.clear(BuiltInSubgroupLocalInvocationId);
  1519. input_builtins.clear(BuiltInSubgroupEqMask);
  1520. input_builtins.clear(BuiltInSubgroupLtMask);
  1521. input_builtins.clear(BuiltInSubgroupLeMask);
  1522. input_builtins.clear(BuiltInSubgroupGtMask);
  1523. input_builtins.clear(BuiltInSubgroupGeMask);
  1524. if (!input_variables.empty() || !input_builtins.empty())
  1525. {
  1526. require_input = true;
  1527. statement("struct SPIRV_Cross_Input");
  1528. begin_scope();
  1529. sort(input_variables.begin(), input_variables.end(), variable_compare);
  1530. for (auto &var : input_variables)
  1531. {
  1532. if (var.block)
  1533. emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs);
  1534. else
  1535. emit_interface_block_in_struct(*var.var, active_inputs);
  1536. }
  1537. emit_builtin_inputs_in_struct();
  1538. end_scope_decl();
  1539. statement("");
  1540. }
  1541. const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT;
  1542. if (!output_variables.empty() || !active_output_builtins.empty())
  1543. {
  1544. sort(output_variables.begin(), output_variables.end(), variable_compare);
  1545. require_output = !is_mesh_shader;
  1546. statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output");
  1547. begin_scope();
  1548. for (auto &var : output_variables)
  1549. {
  1550. if (is_per_primitive_variable(*var.var))
  1551. continue;
  1552. if (var.block && is_mesh_shader && var.block_member_index != 0)
  1553. continue;
  1554. if (var.block && !is_mesh_shader)
  1555. emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs);
  1556. else
  1557. emit_interface_block_in_struct(*var.var, active_outputs);
  1558. }
  1559. emit_builtin_outputs_in_struct();
  1560. if (!is_mesh_shader)
  1561. emit_builtin_primitive_outputs_in_struct();
  1562. end_scope_decl();
  1563. statement("");
  1564. if (is_mesh_shader)
  1565. {
  1566. statement("struct gl_MeshPerPrimitiveEXT");
  1567. begin_scope();
  1568. for (auto &var : output_variables)
  1569. {
  1570. if (!is_per_primitive_variable(*var.var))
  1571. continue;
  1572. if (var.block && var.block_member_index != 0)
  1573. continue;
  1574. emit_interface_block_in_struct(*var.var, active_outputs);
  1575. }
  1576. emit_builtin_primitive_outputs_in_struct();
  1577. end_scope_decl();
  1578. statement("");
  1579. }
  1580. }
  1581. // Global variables.
  1582. for (auto global : global_variables)
  1583. {
  1584. auto &var = get<SPIRVariable>(global);
  1585. if (is_hidden_variable(var, true))
  1586. continue;
  1587. if (var.storage == StorageClassTaskPayloadWorkgroupEXT && is_mesh_shader)
  1588. continue;
  1589. if (var.storage != StorageClassOutput)
  1590. {
  1591. if (!variable_is_lut(var))
  1592. {
  1593. add_resource_name(var.self);
  1594. const char *storage = nullptr;
  1595. switch (var.storage)
  1596. {
  1597. case StorageClassWorkgroup:
  1598. case StorageClassTaskPayloadWorkgroupEXT:
  1599. storage = "groupshared";
  1600. break;
  1601. default:
  1602. storage = "static";
  1603. break;
  1604. }
  1605. string initializer;
  1606. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  1607. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  1608. {
  1609. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  1610. }
  1611. statement(storage, " ", variable_decl(var), initializer, ";");
  1612. emitted = true;
  1613. }
  1614. }
  1615. }
  1616. if (emitted)
  1617. statement("");
  1618. if (requires_op_fmod)
  1619. {
  1620. static const char *types[] = {
  1621. "float",
  1622. "float2",
  1623. "float3",
  1624. "float4",
  1625. };
  1626. for (auto &type : types)
  1627. {
  1628. statement(type, " mod(", type, " x, ", type, " y)");
  1629. begin_scope();
  1630. statement("return x - y * floor(x / y);");
  1631. end_scope();
  1632. statement("");
  1633. }
  1634. }
  1635. emit_texture_size_variants(required_texture_size_variants.srv, "4", false, "");
  1636. for (uint32_t norm = 0; norm < 3; norm++)
  1637. {
  1638. for (uint32_t comp = 0; comp < 4; comp++)
  1639. {
  1640. static const char *qualifiers[] = { "", "unorm ", "snorm " };
  1641. static const char *vecsizes[] = { "", "2", "3", "4" };
  1642. emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true,
  1643. qualifiers[norm]);
  1644. }
  1645. }
  1646. if (requires_fp16_packing)
  1647. {
  1648. // HLSL does not pack into a single word sadly :(
  1649. statement("uint spvPackHalf2x16(float2 value)");
  1650. begin_scope();
  1651. statement("uint2 Packed = f32tof16(value);");
  1652. statement("return Packed.x | (Packed.y << 16);");
  1653. end_scope();
  1654. statement("");
  1655. statement("float2 spvUnpackHalf2x16(uint value)");
  1656. begin_scope();
  1657. statement("return f16tof32(uint2(value & 0xffff, value >> 16));");
  1658. end_scope();
  1659. statement("");
  1660. }
  1661. if (requires_uint2_packing)
  1662. {
  1663. statement("uint64_t spvPackUint2x32(uint2 value)");
  1664. begin_scope();
  1665. statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);");
  1666. end_scope();
  1667. statement("");
  1668. statement("uint2 spvUnpackUint2x32(uint64_t value)");
  1669. begin_scope();
  1670. statement("uint2 Unpacked;");
  1671. statement("Unpacked.x = uint(value & 0xffffffff);");
  1672. statement("Unpacked.y = uint(value >> 32);");
  1673. statement("return Unpacked;");
  1674. end_scope();
  1675. statement("");
  1676. }
  1677. if (requires_explicit_fp16_packing)
  1678. {
  1679. // HLSL does not pack into a single word sadly :(
  1680. statement("uint spvPackFloat2x16(min16float2 value)");
  1681. begin_scope();
  1682. statement("uint2 Packed = f32tof16(value);");
  1683. statement("return Packed.x | (Packed.y << 16);");
  1684. end_scope();
  1685. statement("");
  1686. statement("min16float2 spvUnpackFloat2x16(uint value)");
  1687. begin_scope();
  1688. statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
  1689. end_scope();
  1690. statement("");
  1691. }
  1692. // HLSL does not seem to have builtins for these operation, so roll them by hand ...
  1693. if (requires_unorm8_packing)
  1694. {
  1695. statement("uint spvPackUnorm4x8(float4 value)");
  1696. begin_scope();
  1697. statement("uint4 Packed = uint4(round(saturate(value) * 255.0));");
  1698. statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);");
  1699. end_scope();
  1700. statement("");
  1701. statement("float4 spvUnpackUnorm4x8(uint value)");
  1702. begin_scope();
  1703. statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);");
  1704. statement("return float4(Packed) / 255.0;");
  1705. end_scope();
  1706. statement("");
  1707. }
  1708. if (requires_snorm8_packing)
  1709. {
  1710. statement("uint spvPackSnorm4x8(float4 value)");
  1711. begin_scope();
  1712. statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;");
  1713. statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));");
  1714. end_scope();
  1715. statement("");
  1716. statement("float4 spvUnpackSnorm4x8(uint value)");
  1717. begin_scope();
  1718. statement("int SignedValue = int(value);");
  1719. statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;");
  1720. statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);");
  1721. end_scope();
  1722. statement("");
  1723. }
  1724. if (requires_unorm16_packing)
  1725. {
  1726. statement("uint spvPackUnorm2x16(float2 value)");
  1727. begin_scope();
  1728. statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));");
  1729. statement("return Packed.x | (Packed.y << 16);");
  1730. end_scope();
  1731. statement("");
  1732. statement("float2 spvUnpackUnorm2x16(uint value)");
  1733. begin_scope();
  1734. statement("uint2 Packed = uint2(value & 0xffff, value >> 16);");
  1735. statement("return float2(Packed) / 65535.0;");
  1736. end_scope();
  1737. statement("");
  1738. }
  1739. if (requires_snorm16_packing)
  1740. {
  1741. statement("uint spvPackSnorm2x16(float2 value)");
  1742. begin_scope();
  1743. statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;");
  1744. statement("return uint(Packed.x | (Packed.y << 16));");
  1745. end_scope();
  1746. statement("");
  1747. statement("float2 spvUnpackSnorm2x16(uint value)");
  1748. begin_scope();
  1749. statement("int SignedValue = int(value);");
  1750. statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;");
  1751. statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);");
  1752. end_scope();
  1753. statement("");
  1754. }
  1755. if (requires_bitfield_insert)
  1756. {
  1757. static const char *types[] = { "uint", "uint2", "uint3", "uint4" };
  1758. for (auto &type : types)
  1759. {
  1760. statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)");
  1761. begin_scope();
  1762. statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));");
  1763. statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);");
  1764. end_scope();
  1765. statement("");
  1766. }
  1767. }
  1768. if (requires_bitfield_extract)
  1769. {
  1770. static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" };
  1771. for (auto &type : unsigned_types)
  1772. {
  1773. statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)");
  1774. begin_scope();
  1775. statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);");
  1776. statement("return (Base >> Offset) & Mask;");
  1777. end_scope();
  1778. statement("");
  1779. }
  1780. // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down.
  1781. static const char *signed_types[] = { "int", "int2", "int3", "int4" };
  1782. for (auto &type : signed_types)
  1783. {
  1784. statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)");
  1785. begin_scope();
  1786. statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);");
  1787. statement(type, " Masked = (Base >> Offset) & Mask;");
  1788. statement("int ExtendShift = (32 - Count) & 31;");
  1789. statement("return (Masked << ExtendShift) >> ExtendShift;");
  1790. end_scope();
  1791. statement("");
  1792. }
  1793. }
  1794. if (requires_inverse_2x2)
  1795. {
  1796. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1797. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1798. statement("float2x2 spvInverse(float2x2 m)");
  1799. begin_scope();
  1800. statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1801. statement_no_indent("");
  1802. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1803. statement("adj[0][0] = m[1][1];");
  1804. statement("adj[0][1] = -m[0][1];");
  1805. statement_no_indent("");
  1806. statement("adj[1][0] = -m[1][0];");
  1807. statement("adj[1][1] = m[0][0];");
  1808. statement_no_indent("");
  1809. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  1810. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
  1811. statement_no_indent("");
  1812. statement("// Divide the classical adjoint matrix by the determinant.");
  1813. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  1814. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  1815. end_scope();
  1816. statement("");
  1817. }
  1818. if (requires_inverse_3x3)
  1819. {
  1820. statement("// Returns the determinant of a 2x2 matrix.");
  1821. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  1822. begin_scope();
  1823. statement("return a1 * b2 - b1 * a2;");
  1824. end_scope();
  1825. statement_no_indent("");
  1826. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1827. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1828. statement("float3x3 spvInverse(float3x3 m)");
  1829. begin_scope();
  1830. statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1831. statement_no_indent("");
  1832. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1833. statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
  1834. statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
  1835. statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
  1836. statement_no_indent("");
  1837. statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
  1838. statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
  1839. statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
  1840. statement_no_indent("");
  1841. statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
  1842. statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
  1843. statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
  1844. statement_no_indent("");
  1845. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  1846. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
  1847. statement_no_indent("");
  1848. statement("// Divide the classical adjoint matrix by the determinant.");
  1849. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  1850. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  1851. end_scope();
  1852. statement("");
  1853. }
  1854. if (requires_inverse_4x4)
  1855. {
  1856. if (!requires_inverse_3x3)
  1857. {
  1858. statement("// Returns the determinant of a 2x2 matrix.");
  1859. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  1860. begin_scope();
  1861. statement("return a1 * b2 - b1 * a2;");
  1862. end_scope();
  1863. statement("");
  1864. }
  1865. statement("// Returns the determinant of a 3x3 matrix.");
  1866. statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
  1867. "float c2, float c3)");
  1868. begin_scope();
  1869. statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * "
  1870. "spvDet2x2(a2, a3, "
  1871. "b2, b3);");
  1872. end_scope();
  1873. statement_no_indent("");
  1874. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1875. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1876. statement("float4x4 spvInverse(float4x4 m)");
  1877. begin_scope();
  1878. statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1879. statement_no_indent("");
  1880. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1881. statement(
  1882. "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  1883. "m[3][3]);");
  1884. statement(
  1885. "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  1886. "m[3][3]);");
  1887. statement(
  1888. "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
  1889. "m[3][3]);");
  1890. statement(
  1891. "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
  1892. "m[2][3]);");
  1893. statement_no_indent("");
  1894. statement(
  1895. "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  1896. "m[3][3]);");
  1897. statement(
  1898. "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  1899. "m[3][3]);");
  1900. statement(
  1901. "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
  1902. "m[3][3]);");
  1903. statement(
  1904. "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
  1905. "m[2][3]);");
  1906. statement_no_indent("");
  1907. statement(
  1908. "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  1909. "m[3][3]);");
  1910. statement(
  1911. "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  1912. "m[3][3]);");
  1913. statement(
  1914. "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
  1915. "m[3][3]);");
  1916. statement(
  1917. "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
  1918. "m[2][3]);");
  1919. statement_no_indent("");
  1920. statement(
  1921. "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  1922. "m[3][2]);");
  1923. statement(
  1924. "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  1925. "m[3][2]);");
  1926. statement(
  1927. "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
  1928. "m[3][2]);");
  1929. statement(
  1930. "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
  1931. "m[2][2]);");
  1932. statement_no_indent("");
  1933. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  1934. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
  1935. "* m[3][0]);");
  1936. statement_no_indent("");
  1937. statement("// Divide the classical adjoint matrix by the determinant.");
  1938. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  1939. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  1940. end_scope();
  1941. statement("");
  1942. }
  1943. if (requires_scalar_reflect)
  1944. {
  1945. // FP16/FP64? No templates in HLSL.
  1946. statement("float spvReflect(float i, float n)");
  1947. begin_scope();
  1948. statement("return i - 2.0 * dot(n, i) * n;");
  1949. end_scope();
  1950. statement("");
  1951. }
  1952. if (requires_scalar_refract)
  1953. {
  1954. // FP16/FP64? No templates in HLSL.
  1955. statement("float spvRefract(float i, float n, float eta)");
  1956. begin_scope();
  1957. statement("float NoI = n * i;");
  1958. statement("float NoI2 = NoI * NoI;");
  1959. statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
  1960. statement("if (k < 0.0)");
  1961. begin_scope();
  1962. statement("return 0.0;");
  1963. end_scope();
  1964. statement("else");
  1965. begin_scope();
  1966. statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
  1967. end_scope();
  1968. end_scope();
  1969. statement("");
  1970. }
  1971. if (requires_scalar_faceforward)
  1972. {
  1973. // FP16/FP64? No templates in HLSL.
  1974. statement("float spvFaceForward(float n, float i, float nref)");
  1975. begin_scope();
  1976. statement("return i * nref < 0.0 ? n : -n;");
  1977. end_scope();
  1978. statement("");
  1979. }
  1980. for (TypeID type_id : composite_selection_workaround_types)
  1981. {
  1982. // Need out variable since HLSL does not support returning arrays.
  1983. auto &type = get<SPIRType>(type_id);
  1984. auto type_str = type_to_glsl(type);
  1985. auto type_arr_str = type_to_array_glsl(type);
  1986. statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ",
  1987. type_str, " true_val", type_arr_str, ", ",
  1988. type_str, " false_val", type_arr_str, ")");
  1989. begin_scope();
  1990. statement("if (cond)");
  1991. begin_scope();
  1992. statement("out_value = true_val;");
  1993. end_scope();
  1994. statement("else");
  1995. begin_scope();
  1996. statement("out_value = false_val;");
  1997. end_scope();
  1998. end_scope();
  1999. statement("");
  2000. }
  2001. if (is_mesh_shader && options.vertex.flip_vert_y)
  2002. {
  2003. statement("float4 spvFlipVertY(float4 v)");
  2004. begin_scope();
  2005. statement("return float4(v.x, -v.y, v.z, v.w);");
  2006. end_scope();
  2007. statement("");
  2008. statement("float spvFlipVertY(float v)");
  2009. begin_scope();
  2010. statement("return -v;");
  2011. end_scope();
  2012. statement("");
  2013. }
  2014. }
  2015. void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav,
  2016. const char *type_qualifier)
  2017. {
  2018. if (variant_mask == 0)
  2019. return;
  2020. static const char *types[QueryTypeCount] = { "float", "int", "uint" };
  2021. static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray",
  2022. "Texture3D", "Buffer", "TextureCube", "TextureCubeArray",
  2023. "Texture2DMS", "Texture2DMSArray" };
  2024. static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
  2025. static const char *ret_types[QueryDimCount] = {
  2026. "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
  2027. };
  2028. static const uint32_t return_arguments[QueryDimCount] = {
  2029. 1, 2, 2, 3, 3, 1, 2, 3, 2, 3,
  2030. };
  2031. for (uint32_t index = 0; index < QueryDimCount; index++)
  2032. {
  2033. for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++)
  2034. {
  2035. uint32_t bit = 16 * type_index + index;
  2036. uint64_t mask = 1ull << bit;
  2037. if ((variant_mask & mask) == 0)
  2038. continue;
  2039. statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""),
  2040. dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ",
  2041. (uav ? "" : "uint Level, "), "out uint Param)");
  2042. begin_scope();
  2043. statement(ret_types[index], " ret;");
  2044. switch (return_arguments[index])
  2045. {
  2046. case 1:
  2047. if (has_lod[index] && !uav)
  2048. statement("Tex.GetDimensions(Level, ret.x, Param);");
  2049. else
  2050. {
  2051. statement("Tex.GetDimensions(ret.x);");
  2052. statement("Param = 0u;");
  2053. }
  2054. break;
  2055. case 2:
  2056. if (has_lod[index] && !uav)
  2057. statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);");
  2058. else if (!uav)
  2059. statement("Tex.GetDimensions(ret.x, ret.y, Param);");
  2060. else
  2061. {
  2062. statement("Tex.GetDimensions(ret.x, ret.y);");
  2063. statement("Param = 0u;");
  2064. }
  2065. break;
  2066. case 3:
  2067. if (has_lod[index] && !uav)
  2068. statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
  2069. else if (!uav)
  2070. statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
  2071. else
  2072. {
  2073. statement("Tex.GetDimensions(ret.x, ret.y, ret.z);");
  2074. statement("Param = 0u;");
  2075. }
  2076. break;
  2077. }
  2078. statement("return ret;");
  2079. end_scope();
  2080. statement("");
  2081. }
  2082. }
  2083. }
  2084. void CompilerHLSL::analyze_meshlet_writes()
  2085. {
  2086. uint32_t id_per_vertex = 0;
  2087. uint32_t id_per_primitive = 0;
  2088. bool need_per_primitive = false;
  2089. bool need_per_vertex = false;
  2090. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2091. auto &type = this->get<SPIRType>(var.basetype);
  2092. bool block = has_decoration(type.self, DecorationBlock);
  2093. if (var.storage == StorageClassOutput && block && is_builtin_variable(var))
  2094. {
  2095. auto flags = get_buffer_block_flags(var.self);
  2096. if (flags.get(DecorationPerPrimitiveEXT))
  2097. id_per_primitive = var.self;
  2098. else
  2099. id_per_vertex = var.self;
  2100. }
  2101. else if (var.storage == StorageClassOutput)
  2102. {
  2103. Bitset flags;
  2104. if (block)
  2105. flags = get_buffer_block_flags(var.self);
  2106. else
  2107. flags = get_decoration_bitset(var.self);
  2108. if (flags.get(DecorationPerPrimitiveEXT))
  2109. need_per_primitive = true;
  2110. else
  2111. need_per_vertex = true;
  2112. }
  2113. });
  2114. // If we have per-primitive outputs, and no per-primitive builtins,
  2115. // empty version of gl_MeshPerPrimitiveEXT will be emitted.
  2116. // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
  2117. const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
  2118. auto &execution = get_entry_point();
  2119. uint32_t op_type = ir.increase_bound_by(4);
  2120. uint32_t op_arr = op_type + 1;
  2121. uint32_t op_ptr = op_type + 2;
  2122. uint32_t op_var = op_type + 3;
  2123. auto &type = set<SPIRType>(op_type);
  2124. type.basetype = SPIRType::Struct;
  2125. set_name(op_type, block_name);
  2126. set_decoration(op_type, DecorationBlock);
  2127. if (per_primitive)
  2128. set_decoration(op_type, DecorationPerPrimitiveEXT);
  2129. auto &arr = set<SPIRType>(op_arr, type);
  2130. arr.parent_type = type.self;
  2131. arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
  2132. arr.array_size_literal.push_back(true);
  2133. auto &ptr = set<SPIRType>(op_ptr, arr);
  2134. ptr.parent_type = arr.self;
  2135. ptr.pointer = true;
  2136. ptr.pointer_depth++;
  2137. ptr.storage = StorageClassOutput;
  2138. set_decoration(op_ptr, DecorationBlock);
  2139. set_name(op_ptr, block_name);
  2140. auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
  2141. if (per_primitive)
  2142. set_decoration(op_var, DecorationPerPrimitiveEXT);
  2143. set_name(op_var, instance_name);
  2144. execution.interface_variables.push_back(var.self);
  2145. return op_var;
  2146. };
  2147. if (id_per_vertex == 0 && need_per_vertex)
  2148. id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
  2149. if (id_per_primitive == 0 && need_per_primitive)
  2150. id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
  2151. unordered_set<uint32_t> processed_func_ids;
  2152. analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);
  2153. }
  2154. void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
  2155. std::unordered_set<uint32_t> &processed_func_ids)
  2156. {
  2157. // Avoid processing a function more than once
  2158. if (processed_func_ids.find(func_id) != processed_func_ids.end())
  2159. return;
  2160. processed_func_ids.insert(func_id);
  2161. auto &func = get<SPIRFunction>(func_id);
  2162. // Recursively establish global args added to functions on which we depend.
  2163. for (auto& block : func.blocks)
  2164. {
  2165. auto &b = get<SPIRBlock>(block);
  2166. for (auto &i : b.ops)
  2167. {
  2168. auto ops = stream(i);
  2169. auto op = static_cast<Op>(i.op);
  2170. switch (op)
  2171. {
  2172. case OpFunctionCall:
  2173. {
  2174. // Then recurse into the function itself to extract globals used internally in the function
  2175. uint32_t inner_func_id = ops[2];
  2176. analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids);
  2177. auto &inner_func = get<SPIRFunction>(inner_func_id);
  2178. for (auto &iarg : inner_func.arguments)
  2179. {
  2180. if (!iarg.alias_global_variable)
  2181. continue;
  2182. bool already_declared = false;
  2183. for (auto &arg : func.arguments)
  2184. {
  2185. if (arg.id == iarg.id)
  2186. {
  2187. already_declared = true;
  2188. break;
  2189. }
  2190. }
  2191. if (!already_declared)
  2192. {
  2193. // basetype is effectively ignored here since we declare the argument
  2194. // with explicit types. Just pass down a valid type.
  2195. func.arguments.push_back({ expression_type_id(iarg.id), iarg.id,
  2196. iarg.read_count, iarg.write_count, true });
  2197. }
  2198. }
  2199. break;
  2200. }
  2201. case OpStore:
  2202. case OpLoad:
  2203. case OpInBoundsAccessChain:
  2204. case OpAccessChain:
  2205. case OpPtrAccessChain:
  2206. case OpInBoundsPtrAccessChain:
  2207. case OpArrayLength:
  2208. {
  2209. auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]);
  2210. if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT))
  2211. {
  2212. bool already_declared = false;
  2213. auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  2214. uint32_t var_id = var->self;
  2215. if (var->storage != StorageClassTaskPayloadWorkgroupEXT &&
  2216. builtin_type != BuiltInPrimitivePointIndicesEXT &&
  2217. builtin_type != BuiltInPrimitiveLineIndicesEXT &&
  2218. builtin_type != BuiltInPrimitiveTriangleIndicesEXT)
  2219. {
  2220. var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex;
  2221. }
  2222. for (auto &arg : func.arguments)
  2223. {
  2224. if (arg.id == var_id)
  2225. {
  2226. already_declared = true;
  2227. break;
  2228. }
  2229. }
  2230. if (!already_declared)
  2231. {
  2232. // basetype is effectively ignored here since we declare the argument
  2233. // with explicit types. Just pass down a valid type.
  2234. uint32_t type_id = expression_type_id(var_id);
  2235. if (var->storage == StorageClassTaskPayloadWorkgroupEXT)
  2236. func.arguments.push_back({ type_id, var_id, 1u, 0u, true });
  2237. else
  2238. func.arguments.push_back({ type_id, var_id, 1u, 1u, true });
  2239. }
  2240. }
  2241. break;
  2242. }
  2243. default:
  2244. break;
  2245. }
  2246. }
  2247. }
  2248. }
  2249. string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
  2250. {
  2251. auto &flags = get_member_decoration_bitset(type.self, index);
  2252. // HLSL can emit row_major or column_major decoration in any struct.
  2253. // Do not try to merge combined decorations for children like in GLSL.
  2254. // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major".
  2255. // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout.
  2256. if (flags.get(DecorationColMajor))
  2257. return "row_major ";
  2258. else if (flags.get(DecorationRowMajor))
  2259. return "column_major ";
  2260. return "";
  2261. }
  2262. void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  2263. const string &qualifier, uint32_t base_offset)
  2264. {
  2265. auto &membertype = get<SPIRType>(member_type_id);
  2266. Bitset memberflags;
  2267. auto &memb = ir.meta[type.self].members;
  2268. if (index < memb.size())
  2269. memberflags = memb[index].decoration_flags;
  2270. string packing_offset;
  2271. bool is_push_constant = type.storage == StorageClassPushConstant;
  2272. if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
  2273. has_member_decoration(type.self, index, DecorationOffset))
  2274. {
  2275. uint32_t offset = memb[index].offset - base_offset;
  2276. if (offset & 3)
  2277. SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL.");
  2278. static const char *packing_swizzle[] = { "", ".y", ".z", ".w" };
  2279. packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")");
  2280. }
  2281. statement(layout_for_member(type, index), qualifier,
  2282. variable_decl(membertype, to_member_name(type, index)), packing_offset, ";");
  2283. }
  2284. void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops)
  2285. {
  2286. flush_variable_declaration(ops[0]);
  2287. uint32_t is_commited = evaluate_constant_u32(ops[3]);
  2288. emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false);
  2289. }
  2290. void CompilerHLSL::emit_mesh_tasks(SPIRBlock &block)
  2291. {
  2292. if (block.mesh.payload != 0)
  2293. {
  2294. statement("DispatchMesh(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ",
  2295. to_unpacked_expression(block.mesh.groups[2]), ", ", to_unpacked_expression(block.mesh.payload), ");");
  2296. }
  2297. else
  2298. {
  2299. SPIRV_CROSS_THROW("Amplification shader in HLSL must have payload");
  2300. }
  2301. }
  2302. void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
  2303. {
  2304. auto &type = get<SPIRType>(var.basetype);
  2305. bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock);
  2306. if (flattened_buffer_blocks.count(var.self))
  2307. {
  2308. emit_buffer_block_flattened(var);
  2309. }
  2310. else if (is_uav)
  2311. {
  2312. Bitset flags = ir.get_buffer_block_flags(var);
  2313. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  2314. bool is_coherent = flags.get(DecorationCoherent) && !is_readonly;
  2315. bool is_interlocked = interlocked_resources.count(var.self) > 0;
  2316. auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string
  2317. {
  2318. if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1)
  2319. {
  2320. // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V
  2321. const auto &member0_type = this->get<SPIRType>(parent_type.member_types.front());
  2322. return this->type_to_glsl(member0_type);
  2323. }
  2324. else
  2325. {
  2326. // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer<int>
  2327. return this->type_to_glsl(parent_type);
  2328. }
  2329. };
  2330. std::string type_name;
  2331. if (is_user_type_structured(var.self))
  2332. type_name = join(is_readonly ? "" : is_interlocked ? "RasterizerOrdered" : "RW", "StructuredBuffer<", to_structuredbuffer_subtype_name(type), ">");
  2333. else
  2334. type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer";
  2335. add_resource_name(var.self);
  2336. statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type),
  2337. to_resource_binding(var), ";");
  2338. }
  2339. else
  2340. {
  2341. if (type.array.empty())
  2342. {
  2343. // Flatten the top-level struct so we can use packoffset,
  2344. // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs.
  2345. flattened_structs[var.self] = false;
  2346. // Prefer the block name if possible.
  2347. auto buffer_name = to_name(type.self, false);
  2348. if (ir.meta[type.self].decoration.alias.empty() ||
  2349. resource_names.find(buffer_name) != end(resource_names) ||
  2350. block_names.find(buffer_name) != end(block_names))
  2351. {
  2352. buffer_name = get_block_fallback_name(var.self);
  2353. }
  2354. add_variable(block_names, resource_names, buffer_name);
  2355. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2356. // This cannot conflict with anything else, so we're safe now.
  2357. if (buffer_name.empty())
  2358. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2359. uint32_t failed_index = 0;
  2360. if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index))
  2361. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2362. else
  2363. {
  2364. SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ",
  2365. failed_index, " (name: ", to_member_name(type, failed_index),
  2366. ") cannot be expressed with either HLSL packing layout or packoffset."));
  2367. }
  2368. block_names.insert(buffer_name);
  2369. // Save for post-reflection later.
  2370. declared_block_names[var.self] = buffer_name;
  2371. type.member_name_cache.clear();
  2372. // var.self can be used as a backup name for the block name,
  2373. // so we need to make sure we don't disturb the name here on a recompile.
  2374. // It will need to be reset if we have to recompile.
  2375. preserve_alias_on_reset(var.self);
  2376. add_resource_name(var.self);
  2377. statement("cbuffer ", buffer_name, to_resource_binding(var));
  2378. begin_scope();
  2379. uint32_t i = 0;
  2380. for (auto &member : type.member_types)
  2381. {
  2382. add_member_name(type, i);
  2383. auto backup_name = get_member_name(type.self, i);
  2384. auto member_name = to_member_name(type, i);
  2385. member_name = join(to_name(var.self), "_", member_name);
  2386. ParsedIR::sanitize_underscores(member_name);
  2387. set_member_name(type.self, i, member_name);
  2388. emit_struct_member(type, member, i, "");
  2389. set_member_name(type.self, i, backup_name);
  2390. i++;
  2391. }
  2392. end_scope_decl();
  2393. statement("");
  2394. }
  2395. else
  2396. {
  2397. if (hlsl_options.shader_model < 51)
  2398. SPIRV_CROSS_THROW(
  2399. "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
  2400. add_resource_name(type.self);
  2401. add_resource_name(var.self);
  2402. // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
  2403. uint32_t failed_index = 0;
  2404. if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index))
  2405. {
  2406. SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self),
  2407. "), member index ", failed_index, " (name: ", to_member_name(type, failed_index),
  2408. ") cannot be expressed with normal HLSL packing rules."));
  2409. }
  2410. emit_struct(get<SPIRType>(type.self));
  2411. statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type),
  2412. to_resource_binding(var), ";");
  2413. }
  2414. }
  2415. }
  2416. void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
  2417. {
  2418. if (flattened_buffer_blocks.count(var.self))
  2419. {
  2420. emit_buffer_block_flattened(var);
  2421. }
  2422. else if (root_constants_layout.empty())
  2423. {
  2424. emit_buffer_block(var);
  2425. }
  2426. else
  2427. {
  2428. for (const auto &layout : root_constants_layout)
  2429. {
  2430. auto &type = get<SPIRType>(var.basetype);
  2431. uint32_t failed_index = 0;
  2432. if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start,
  2433. layout.end))
  2434. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2435. else
  2436. {
  2437. SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")",
  2438. ", member index ", failed_index, " (name: ", to_member_name(type, failed_index),
  2439. ") cannot be expressed with either HLSL packing layout or packoffset."));
  2440. }
  2441. flattened_structs[var.self] = false;
  2442. type.member_name_cache.clear();
  2443. add_resource_name(var.self);
  2444. auto &memb = ir.meta[type.self].members;
  2445. statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self),
  2446. to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space));
  2447. begin_scope();
  2448. // Index of the next field in the generated root constant constant buffer
  2449. auto constant_index = 0u;
  2450. // Iterate over all member of the push constant and check which of the fields
  2451. // fit into the given root constant layout.
  2452. for (auto i = 0u; i < memb.size(); i++)
  2453. {
  2454. const auto offset = memb[i].offset;
  2455. if (layout.start <= offset && offset < layout.end)
  2456. {
  2457. const auto &member = type.member_types[i];
  2458. add_member_name(type, constant_index);
  2459. auto backup_name = get_member_name(type.self, i);
  2460. auto member_name = to_member_name(type, i);
  2461. member_name = join(to_name(var.self), "_", member_name);
  2462. ParsedIR::sanitize_underscores(member_name);
  2463. set_member_name(type.self, constant_index, member_name);
  2464. emit_struct_member(type, member, i, "", layout.start);
  2465. set_member_name(type.self, constant_index, backup_name);
  2466. constant_index++;
  2467. }
  2468. }
  2469. end_scope_decl();
  2470. }
  2471. }
  2472. }
  2473. string CompilerHLSL::to_sampler_expression(uint32_t id)
  2474. {
  2475. auto expr = join("_", to_non_uniform_aware_expression(id));
  2476. auto index = expr.find_first_of('[');
  2477. if (index == string::npos)
  2478. {
  2479. return expr + "_sampler";
  2480. }
  2481. else
  2482. {
  2483. // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead.
  2484. return expr.insert(index, "_sampler");
  2485. }
  2486. }
  2487. void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  2488. {
  2489. if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty())
  2490. {
  2491. set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
  2492. }
  2493. else
  2494. {
  2495. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  2496. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  2497. }
  2498. }
  2499. string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  2500. {
  2501. string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
  2502. if (hlsl_options.shader_model <= 30)
  2503. return arg_str;
  2504. // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL.
  2505. auto &type = expression_type(id);
  2506. // We don't have to consider combined image samplers here via OpSampledImage because
  2507. // those variables cannot be passed as arguments to functions.
  2508. // Only global SampledImage variables may be used as arguments.
  2509. if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
  2510. arg_str += ", " + to_sampler_expression(id);
  2511. return arg_str;
  2512. }
  2513. string CompilerHLSL::get_inner_entry_point_name() const
  2514. {
  2515. auto &execution = get_entry_point();
  2516. if (hlsl_options.use_entry_point_name)
  2517. {
  2518. auto name = join(execution.name, "_inner");
  2519. ParsedIR::sanitize_underscores(name);
  2520. return name;
  2521. }
  2522. if (execution.model == ExecutionModelVertex)
  2523. return "vert_main";
  2524. else if (execution.model == ExecutionModelFragment)
  2525. return "frag_main";
  2526. else if (execution.model == ExecutionModelGLCompute)
  2527. return "comp_main";
  2528. else if (execution.model == ExecutionModelMeshEXT)
  2529. return "mesh_main";
  2530. else if (execution.model == ExecutionModelTaskEXT)
  2531. return "task_main";
  2532. else
  2533. SPIRV_CROSS_THROW("Unsupported execution model.");
  2534. }
  2535. void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  2536. {
  2537. if (func.self != ir.default_entry_point)
  2538. add_function_overload(func);
  2539. // Avoid shadow declarations.
  2540. local_variable_names = resource_names;
  2541. string decl;
  2542. auto &type = get<SPIRType>(func.return_type);
  2543. if (type.array.empty())
  2544. {
  2545. decl += flags_to_qualifiers_glsl(type, return_flags);
  2546. decl += type_to_glsl(type);
  2547. decl += " ";
  2548. }
  2549. else
  2550. {
  2551. // We cannot return arrays in HLSL, so "return" through an out variable.
  2552. decl = "void ";
  2553. }
  2554. if (func.self == ir.default_entry_point)
  2555. {
  2556. decl += get_inner_entry_point_name();
  2557. processing_entry_point = true;
  2558. }
  2559. else
  2560. decl += to_name(func.self);
  2561. decl += "(";
  2562. SmallVector<string> arglist;
  2563. if (!type.array.empty())
  2564. {
  2565. // Fake array returns by writing to an out array instead.
  2566. string out_argument;
  2567. out_argument += "out ";
  2568. out_argument += type_to_glsl(type);
  2569. out_argument += " ";
  2570. out_argument += "spvReturnValue";
  2571. out_argument += type_to_array_glsl(type);
  2572. arglist.push_back(std::move(out_argument));
  2573. }
  2574. for (auto &arg : func.arguments)
  2575. {
  2576. // Do not pass in separate images or samplers if we're remapping
  2577. // to combined image samplers.
  2578. if (skip_argument(arg.id))
  2579. continue;
  2580. // Might change the variable name if it already exists in this function.
  2581. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  2582. // to use same name for variables.
  2583. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  2584. add_local_variable_name(arg.id);
  2585. arglist.push_back(argument_decl(arg));
  2586. // Flatten a combined sampler to two separate arguments in modern HLSL.
  2587. auto &arg_type = get<SPIRType>(arg.type);
  2588. if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage &&
  2589. arg_type.image.dim != DimBuffer)
  2590. {
  2591. // Manufacture automatic sampler arg for SampledImage texture
  2592. arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ",
  2593. to_sampler_expression(arg.id), type_to_array_glsl(arg_type)));
  2594. }
  2595. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  2596. auto *var = maybe_get<SPIRVariable>(arg.id);
  2597. if (var)
  2598. var->parameter = &arg;
  2599. }
  2600. for (auto &arg : func.shadow_arguments)
  2601. {
  2602. // Might change the variable name if it already exists in this function.
  2603. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  2604. // to use same name for variables.
  2605. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  2606. add_local_variable_name(arg.id);
  2607. arglist.push_back(argument_decl(arg));
  2608. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  2609. auto *var = maybe_get<SPIRVariable>(arg.id);
  2610. if (var)
  2611. var->parameter = &arg;
  2612. }
  2613. decl += merge(arglist);
  2614. decl += ")";
  2615. statement(decl);
  2616. }
  2617. void CompilerHLSL::emit_hlsl_entry_point()
  2618. {
  2619. SmallVector<string> arguments;
  2620. if (require_input)
  2621. arguments.push_back("SPIRV_Cross_Input stage_input");
  2622. auto &execution = get_entry_point();
  2623. switch (execution.model)
  2624. {
  2625. case ExecutionModelTaskEXT:
  2626. case ExecutionModelMeshEXT:
  2627. case ExecutionModelGLCompute:
  2628. {
  2629. if (execution.model == ExecutionModelMeshEXT)
  2630. {
  2631. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  2632. statement("[outputtopology(\"triangle\")]");
  2633. else if (execution.flags.get(ExecutionModeOutputLinesEXT))
  2634. statement("[outputtopology(\"line\")]");
  2635. else if (execution.flags.get(ExecutionModeOutputPoints))
  2636. SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX");
  2637. auto &func = get<SPIRFunction>(ir.default_entry_point);
  2638. for (auto &arg : func.arguments)
  2639. {
  2640. auto &var = get<SPIRVariable>(arg.id);
  2641. auto &base_type = get<SPIRType>(var.basetype);
  2642. bool block = has_decoration(base_type.self, DecorationBlock);
  2643. if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
  2644. {
  2645. arguments.push_back("in payload " + variable_decl(var));
  2646. }
  2647. else if (block)
  2648. {
  2649. auto flags = get_buffer_block_flags(var.self);
  2650. if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT))
  2651. {
  2652. arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" +
  2653. std::to_string(execution.output_primitives) + "]");
  2654. }
  2655. else
  2656. {
  2657. arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" +
  2658. std::to_string(execution.output_vertices) + "]");
  2659. }
  2660. }
  2661. else
  2662. {
  2663. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  2664. {
  2665. arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" +
  2666. std::to_string(execution.output_primitives) + "]");
  2667. }
  2668. else
  2669. {
  2670. arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" +
  2671. std::to_string(execution.output_primitives) + "]");
  2672. }
  2673. }
  2674. }
  2675. }
  2676. SpecializationConstant wg_x, wg_y, wg_z;
  2677. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2678. uint32_t x = execution.workgroup_size.x;
  2679. uint32_t y = execution.workgroup_size.y;
  2680. uint32_t z = execution.workgroup_size.z;
  2681. if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId))
  2682. {
  2683. if (execution.workgroup_size.id_x)
  2684. x = get<SPIRConstant>(execution.workgroup_size.id_x).scalar();
  2685. if (execution.workgroup_size.id_y)
  2686. y = get<SPIRConstant>(execution.workgroup_size.id_y).scalar();
  2687. if (execution.workgroup_size.id_z)
  2688. z = get<SPIRConstant>(execution.workgroup_size.id_z).scalar();
  2689. }
  2690. auto x_expr = wg_x.id ? get<SPIRConstant>(wg_x.id).specialization_constant_macro_name : to_string(x);
  2691. auto y_expr = wg_y.id ? get<SPIRConstant>(wg_y.id).specialization_constant_macro_name : to_string(y);
  2692. auto z_expr = wg_z.id ? get<SPIRConstant>(wg_z.id).specialization_constant_macro_name : to_string(z);
  2693. statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]");
  2694. break;
  2695. }
  2696. case ExecutionModelFragment:
  2697. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  2698. statement("[earlydepthstencil]");
  2699. break;
  2700. default:
  2701. break;
  2702. }
  2703. const char *entry_point_name;
  2704. if (hlsl_options.use_entry_point_name)
  2705. entry_point_name = get_entry_point().name.c_str();
  2706. else
  2707. entry_point_name = "main";
  2708. statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")");
  2709. begin_scope();
  2710. bool legacy = hlsl_options.shader_model <= 30;
  2711. // Copy builtins from entry point arguments to globals.
  2712. active_input_builtins.for_each_bit([&](uint32_t i) {
  2713. auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassInput);
  2714. switch (static_cast<BuiltIn>(i))
  2715. {
  2716. case BuiltInFragCoord:
  2717. // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent.
  2718. // TODO: Do we need an option here? Any reason why a D3D9 shader would be used
  2719. // on a D3D10+ system with a different rasterization config?
  2720. if (legacy)
  2721. statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);");
  2722. else
  2723. {
  2724. statement(builtin, " = stage_input.", builtin, ";");
  2725. // ZW are undefined in D3D9, only do this fixup here.
  2726. statement(builtin, ".w = 1.0 / ", builtin, ".w;");
  2727. }
  2728. break;
  2729. case BuiltInVertexId:
  2730. case BuiltInVertexIndex:
  2731. case BuiltInInstanceIndex:
  2732. // D3D semantics are uint, but shader wants int.
  2733. if (hlsl_options.support_nonzero_base_vertex_base_instance)
  2734. {
  2735. if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex)
  2736. statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;");
  2737. else
  2738. statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;");
  2739. }
  2740. else
  2741. statement(builtin, " = int(stage_input.", builtin, ");");
  2742. break;
  2743. case BuiltInBaseVertex:
  2744. statement(builtin, " = SPIRV_Cross_BaseVertex;");
  2745. break;
  2746. case BuiltInBaseInstance:
  2747. statement(builtin, " = SPIRV_Cross_BaseInstance;");
  2748. break;
  2749. case BuiltInInstanceId:
  2750. // D3D semantics are uint, but shader wants int.
  2751. statement(builtin, " = int(stage_input.", builtin, ");");
  2752. break;
  2753. case BuiltInNumWorkgroups:
  2754. case BuiltInPointCoord:
  2755. case BuiltInSubgroupSize:
  2756. case BuiltInSubgroupLocalInvocationId:
  2757. case BuiltInHelperInvocation:
  2758. break;
  2759. case BuiltInSubgroupEqMask:
  2760. // Emulate these ...
  2761. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  2762. statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));");
  2763. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;");
  2764. statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;");
  2765. statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;");
  2766. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;");
  2767. break;
  2768. case BuiltInSubgroupGeMask:
  2769. // Emulate these ...
  2770. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  2771. statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);");
  2772. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;");
  2773. statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;");
  2774. statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;");
  2775. statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;");
  2776. statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;");
  2777. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;");
  2778. break;
  2779. case BuiltInSubgroupGtMask:
  2780. // Emulate these ...
  2781. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  2782. statement("uint gt_lane_index = WaveGetLaneIndex() + 1;");
  2783. statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);");
  2784. statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;");
  2785. statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;");
  2786. statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;");
  2787. statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;");
  2788. statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;");
  2789. statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;");
  2790. statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;");
  2791. break;
  2792. case BuiltInSubgroupLeMask:
  2793. // Emulate these ...
  2794. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  2795. statement("uint le_lane_index = WaveGetLaneIndex() + 1;");
  2796. statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;");
  2797. statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;");
  2798. statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;");
  2799. statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;");
  2800. statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;");
  2801. statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;");
  2802. statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;");
  2803. statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;");
  2804. break;
  2805. case BuiltInSubgroupLtMask:
  2806. // Emulate these ...
  2807. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  2808. statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;");
  2809. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;");
  2810. statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;");
  2811. statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;");
  2812. statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;");
  2813. statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;");
  2814. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;");
  2815. break;
  2816. case BuiltInClipDistance:
  2817. for (uint32_t clip = 0; clip < clip_distance_count; clip++)
  2818. statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3],
  2819. ";");
  2820. break;
  2821. case BuiltInCullDistance:
  2822. for (uint32_t cull = 0; cull < cull_distance_count; cull++)
  2823. statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3],
  2824. ";");
  2825. break;
  2826. default:
  2827. statement(builtin, " = stage_input.", builtin, ";");
  2828. break;
  2829. }
  2830. });
  2831. // Copy from stage input struct to globals.
  2832. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2833. auto &type = this->get<SPIRType>(var.basetype);
  2834. bool block = has_decoration(type.self, DecorationBlock);
  2835. if (var.storage != StorageClassInput)
  2836. return;
  2837. bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
  2838. if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  2839. interface_variable_exists_in_entry_point(var.self))
  2840. {
  2841. if (block)
  2842. {
  2843. auto type_name = to_name(type.self);
  2844. auto var_name = to_name(var.self);
  2845. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
  2846. {
  2847. auto mbr_name = to_member_name(type, mbr_idx);
  2848. auto flat_name = join(type_name, "_", mbr_name);
  2849. statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";");
  2850. }
  2851. }
  2852. else
  2853. {
  2854. auto name = to_name(var.self);
  2855. auto &mtype = this->get<SPIRType>(var.basetype);
  2856. if (need_matrix_unroll && mtype.columns > 1)
  2857. {
  2858. // Unroll matrices.
  2859. for (uint32_t col = 0; col < mtype.columns; col++)
  2860. statement(name, "[", col, "] = stage_input.", name, "_", col, ";");
  2861. }
  2862. else
  2863. {
  2864. statement(name, " = stage_input.", name, ";");
  2865. }
  2866. }
  2867. }
  2868. });
  2869. // Run the shader.
  2870. if (execution.model == ExecutionModelVertex ||
  2871. execution.model == ExecutionModelFragment ||
  2872. execution.model == ExecutionModelGLCompute ||
  2873. execution.model == ExecutionModelMeshEXT ||
  2874. execution.model == ExecutionModelTaskEXT)
  2875. {
  2876. // For mesh shaders, we receive special arguments that we must pass down as function arguments.
  2877. // HLSL does not support proper reference types for passing these IO blocks,
  2878. // but DXC post-inlining seems to magically fix it up anyways *shrug*.
  2879. SmallVector<string> arglist;
  2880. auto &func = get<SPIRFunction>(ir.default_entry_point);
  2881. // The arguments are marked out, avoid detecting reads and emitting inout.
  2882. for (auto &arg : func.arguments)
  2883. arglist.push_back(to_expression(arg.id, false));
  2884. statement(get_inner_entry_point_name(), "(", merge(arglist), ");");
  2885. }
  2886. else
  2887. SPIRV_CROSS_THROW("Unsupported shader stage.");
  2888. // Copy stage outputs.
  2889. if (require_output)
  2890. {
  2891. statement("SPIRV_Cross_Output stage_output;");
  2892. // Copy builtins from globals to return struct.
  2893. active_output_builtins.for_each_bit([&](uint32_t i) {
  2894. // PointSize doesn't exist in HLSL SM 4+.
  2895. if (i == BuiltInPointSize && !legacy)
  2896. return;
  2897. switch (static_cast<BuiltIn>(i))
  2898. {
  2899. case BuiltInClipDistance:
  2900. for (uint32_t clip = 0; clip < clip_distance_count; clip++)
  2901. statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[",
  2902. clip, "];");
  2903. break;
  2904. case BuiltInCullDistance:
  2905. for (uint32_t cull = 0; cull < cull_distance_count; cull++)
  2906. statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[",
  2907. cull, "];");
  2908. break;
  2909. default:
  2910. {
  2911. auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput);
  2912. statement("stage_output.", builtin_expr, " = ", builtin_expr, ";");
  2913. break;
  2914. }
  2915. }
  2916. });
  2917. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2918. auto &type = this->get<SPIRType>(var.basetype);
  2919. bool block = has_decoration(type.self, DecorationBlock);
  2920. if (var.storage != StorageClassOutput)
  2921. return;
  2922. if (!var.remapped_variable && type.pointer &&
  2923. !is_builtin_variable(var) &&
  2924. interface_variable_exists_in_entry_point(var.self))
  2925. {
  2926. if (block)
  2927. {
  2928. // I/O blocks need to flatten output.
  2929. auto type_name = to_name(type.self);
  2930. auto var_name = to_name(var.self);
  2931. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
  2932. {
  2933. auto mbr_name = to_member_name(type, mbr_idx);
  2934. auto flat_name = join(type_name, "_", mbr_name);
  2935. statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";");
  2936. }
  2937. }
  2938. else
  2939. {
  2940. auto name = to_name(var.self);
  2941. if (legacy && execution.model == ExecutionModelFragment)
  2942. {
  2943. string output_filler;
  2944. for (uint32_t size = type.vecsize; size < 4; ++size)
  2945. output_filler += ", 0.0";
  2946. statement("stage_output.", name, " = float4(", name, output_filler, ");");
  2947. }
  2948. else
  2949. {
  2950. statement("stage_output.", name, " = ", name, ";");
  2951. }
  2952. }
  2953. }
  2954. });
  2955. statement("return stage_output;");
  2956. }
  2957. end_scope();
  2958. }
  2959. void CompilerHLSL::emit_fixup()
  2960. {
  2961. if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition))
  2962. {
  2963. // Do various mangling on the gl_Position.
  2964. if (hlsl_options.shader_model <= 30)
  2965. {
  2966. statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * "
  2967. "gl_Position.w;");
  2968. statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * "
  2969. "gl_Position.w;");
  2970. }
  2971. if (options.vertex.flip_vert_y)
  2972. statement("gl_Position.y = -gl_Position.y;");
  2973. if (options.vertex.fixup_clipspace)
  2974. statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;");
  2975. }
  2976. }
  2977. void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
  2978. {
  2979. if (sparse)
  2980. SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL.");
  2981. auto *ops = stream(i);
  2982. auto op = static_cast<Op>(i.op);
  2983. uint32_t length = i.length;
  2984. SmallVector<uint32_t> inherited_expressions;
  2985. uint32_t result_type = ops[0];
  2986. uint32_t id = ops[1];
  2987. VariableID img = ops[2];
  2988. uint32_t coord = ops[3];
  2989. uint32_t dref = 0;
  2990. uint32_t comp = 0;
  2991. bool gather = false;
  2992. bool proj = false;
  2993. const uint32_t *opt = nullptr;
  2994. auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img);
  2995. if (combined_image && has_decoration(img, DecorationNonUniform))
  2996. {
  2997. set_decoration(combined_image->image, DecorationNonUniform);
  2998. set_decoration(combined_image->sampler, DecorationNonUniform);
  2999. }
  3000. auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img);
  3001. inherited_expressions.push_back(coord);
  3002. switch (op)
  3003. {
  3004. case OpImageSampleDrefImplicitLod:
  3005. case OpImageSampleDrefExplicitLod:
  3006. dref = ops[4];
  3007. opt = &ops[5];
  3008. length -= 5;
  3009. break;
  3010. case OpImageSampleProjDrefImplicitLod:
  3011. case OpImageSampleProjDrefExplicitLod:
  3012. dref = ops[4];
  3013. proj = true;
  3014. opt = &ops[5];
  3015. length -= 5;
  3016. break;
  3017. case OpImageDrefGather:
  3018. dref = ops[4];
  3019. opt = &ops[5];
  3020. gather = true;
  3021. length -= 5;
  3022. break;
  3023. case OpImageGather:
  3024. comp = ops[4];
  3025. opt = &ops[5];
  3026. gather = true;
  3027. length -= 5;
  3028. break;
  3029. case OpImageSampleProjImplicitLod:
  3030. case OpImageSampleProjExplicitLod:
  3031. opt = &ops[4];
  3032. length -= 4;
  3033. proj = true;
  3034. break;
  3035. case OpImageQueryLod:
  3036. opt = &ops[4];
  3037. length -= 4;
  3038. break;
  3039. default:
  3040. opt = &ops[4];
  3041. length -= 4;
  3042. break;
  3043. }
  3044. auto &imgtype = expression_type(img);
  3045. uint32_t coord_components = 0;
  3046. switch (imgtype.image.dim)
  3047. {
  3048. case spv::Dim1D:
  3049. coord_components = 1;
  3050. break;
  3051. case spv::Dim2D:
  3052. coord_components = 2;
  3053. break;
  3054. case spv::Dim3D:
  3055. coord_components = 3;
  3056. break;
  3057. case spv::DimCube:
  3058. coord_components = 3;
  3059. break;
  3060. case spv::DimBuffer:
  3061. coord_components = 1;
  3062. break;
  3063. default:
  3064. coord_components = 2;
  3065. break;
  3066. }
  3067. if (dref)
  3068. inherited_expressions.push_back(dref);
  3069. if (imgtype.image.arrayed)
  3070. coord_components++;
  3071. uint32_t bias = 0;
  3072. uint32_t lod = 0;
  3073. uint32_t grad_x = 0;
  3074. uint32_t grad_y = 0;
  3075. uint32_t coffset = 0;
  3076. uint32_t offset = 0;
  3077. uint32_t coffsets = 0;
  3078. uint32_t sample = 0;
  3079. uint32_t minlod = 0;
  3080. uint32_t flags = 0;
  3081. if (length)
  3082. {
  3083. flags = opt[0];
  3084. opt++;
  3085. length--;
  3086. }
  3087. auto test = [&](uint32_t &v, uint32_t flag) {
  3088. if (length && (flags & flag))
  3089. {
  3090. v = *opt++;
  3091. inherited_expressions.push_back(v);
  3092. length--;
  3093. }
  3094. };
  3095. test(bias, ImageOperandsBiasMask);
  3096. test(lod, ImageOperandsLodMask);
  3097. test(grad_x, ImageOperandsGradMask);
  3098. test(grad_y, ImageOperandsGradMask);
  3099. test(coffset, ImageOperandsConstOffsetMask);
  3100. test(offset, ImageOperandsOffsetMask);
  3101. test(coffsets, ImageOperandsConstOffsetsMask);
  3102. test(sample, ImageOperandsSampleMask);
  3103. test(minlod, ImageOperandsMinLodMask);
  3104. string expr;
  3105. string texop;
  3106. if (minlod != 0)
  3107. SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL.");
  3108. if (op == OpImageFetch)
  3109. {
  3110. if (hlsl_options.shader_model < 40)
  3111. {
  3112. SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3.");
  3113. }
  3114. texop += img_expr;
  3115. texop += ".Load";
  3116. }
  3117. else if (op == OpImageQueryLod)
  3118. {
  3119. texop += img_expr;
  3120. texop += ".CalculateLevelOfDetail";
  3121. }
  3122. else
  3123. {
  3124. auto &imgformat = get<SPIRType>(imgtype.image.type);
  3125. if (hlsl_options.shader_model < 67 && imgformat.basetype != SPIRType::Float)
  3126. {
  3127. SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL SM < 6.7.");
  3128. }
  3129. if (hlsl_options.shader_model >= 40)
  3130. {
  3131. texop += img_expr;
  3132. if (is_depth_image(imgtype, img))
  3133. {
  3134. if (gather)
  3135. {
  3136. texop += ".GatherCmp";
  3137. }
  3138. else if (lod || grad_x || grad_y)
  3139. {
  3140. // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero.
  3141. texop += ".SampleCmpLevelZero";
  3142. }
  3143. else
  3144. texop += ".SampleCmp";
  3145. }
  3146. else if (gather)
  3147. {
  3148. uint32_t comp_num = evaluate_constant_u32(comp);
  3149. if (hlsl_options.shader_model >= 50)
  3150. {
  3151. switch (comp_num)
  3152. {
  3153. case 0:
  3154. texop += ".GatherRed";
  3155. break;
  3156. case 1:
  3157. texop += ".GatherGreen";
  3158. break;
  3159. case 2:
  3160. texop += ".GatherBlue";
  3161. break;
  3162. case 3:
  3163. texop += ".GatherAlpha";
  3164. break;
  3165. default:
  3166. SPIRV_CROSS_THROW("Invalid component.");
  3167. }
  3168. }
  3169. else
  3170. {
  3171. if (comp_num == 0)
  3172. texop += ".Gather";
  3173. else
  3174. SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component.");
  3175. }
  3176. }
  3177. else if (bias)
  3178. texop += ".SampleBias";
  3179. else if (grad_x || grad_y)
  3180. texop += ".SampleGrad";
  3181. else if (lod)
  3182. texop += ".SampleLevel";
  3183. else
  3184. texop += ".Sample";
  3185. }
  3186. else
  3187. {
  3188. switch (imgtype.image.dim)
  3189. {
  3190. case Dim1D:
  3191. texop += "tex1D";
  3192. break;
  3193. case Dim2D:
  3194. texop += "tex2D";
  3195. break;
  3196. case Dim3D:
  3197. texop += "tex3D";
  3198. break;
  3199. case DimCube:
  3200. texop += "texCUBE";
  3201. break;
  3202. case DimRect:
  3203. case DimBuffer:
  3204. case DimSubpassData:
  3205. SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO
  3206. default:
  3207. SPIRV_CROSS_THROW("Invalid dimension.");
  3208. }
  3209. if (gather)
  3210. SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3.");
  3211. if (offset || coffset)
  3212. SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3.");
  3213. if (grad_x || grad_y)
  3214. texop += "grad";
  3215. else if (lod)
  3216. texop += "lod";
  3217. else if (bias)
  3218. texop += "bias";
  3219. else if (proj || dref)
  3220. texop += "proj";
  3221. }
  3222. }
  3223. expr += texop;
  3224. expr += "(";
  3225. if (hlsl_options.shader_model < 40)
  3226. {
  3227. if (combined_image)
  3228. SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3.");
  3229. expr += to_expression(img);
  3230. }
  3231. else if (op != OpImageFetch)
  3232. {
  3233. string sampler_expr;
  3234. if (combined_image)
  3235. sampler_expr = to_non_uniform_aware_expression(combined_image->sampler);
  3236. else
  3237. sampler_expr = to_sampler_expression(img);
  3238. expr += sampler_expr;
  3239. }
  3240. auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * {
  3241. if (comps == in_comps)
  3242. return "";
  3243. switch (comps)
  3244. {
  3245. case 1:
  3246. return ".x";
  3247. case 2:
  3248. return ".xy";
  3249. case 3:
  3250. return ".xyz";
  3251. default:
  3252. return "";
  3253. }
  3254. };
  3255. bool forward = should_forward(coord);
  3256. // The IR can give us more components than we need, so chop them off as needed.
  3257. string coord_expr;
  3258. auto &coord_type = expression_type(coord);
  3259. if (coord_components != coord_type.vecsize)
  3260. coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize);
  3261. else
  3262. coord_expr = to_expression(coord);
  3263. if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us.
  3264. coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components);
  3265. if (hlsl_options.shader_model < 40)
  3266. {
  3267. if (dref)
  3268. {
  3269. if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D)
  3270. {
  3271. SPIRV_CROSS_THROW(
  3272. "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3.");
  3273. }
  3274. if (grad_x || grad_y)
  3275. SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3.");
  3276. for (uint32_t size = coord_components; size < 2; ++size)
  3277. coord_expr += ", 0.0";
  3278. forward = forward && should_forward(dref);
  3279. coord_expr += ", " + to_expression(dref);
  3280. }
  3281. else if (lod || bias || proj)
  3282. {
  3283. for (uint32_t size = coord_components; size < 3; ++size)
  3284. coord_expr += ", 0.0";
  3285. }
  3286. if (lod)
  3287. {
  3288. coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")";
  3289. }
  3290. else if (bias)
  3291. {
  3292. coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")";
  3293. }
  3294. else if (proj)
  3295. {
  3296. coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")";
  3297. }
  3298. else if (dref)
  3299. {
  3300. // A "normal" sample gets fed into tex2Dproj as well, because the
  3301. // regular tex2D accepts only two coordinates.
  3302. coord_expr = "float4(" + coord_expr + ", 1.0)";
  3303. }
  3304. if (!!lod + !!bias + !!proj > 1)
  3305. SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers.");
  3306. }
  3307. if (op == OpImageFetch)
  3308. {
  3309. if (imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  3310. coord_expr =
  3311. join("int", coord_components + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")");
  3312. }
  3313. else
  3314. expr += ", ";
  3315. expr += coord_expr;
  3316. if (dref && hlsl_options.shader_model >= 40)
  3317. {
  3318. forward = forward && should_forward(dref);
  3319. expr += ", ";
  3320. if (proj)
  3321. expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components);
  3322. else
  3323. expr += to_expression(dref);
  3324. }
  3325. if (!dref && (grad_x || grad_y))
  3326. {
  3327. forward = forward && should_forward(grad_x);
  3328. forward = forward && should_forward(grad_y);
  3329. expr += ", ";
  3330. expr += to_expression(grad_x);
  3331. expr += ", ";
  3332. expr += to_expression(grad_y);
  3333. }
  3334. if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch)
  3335. {
  3336. forward = forward && should_forward(lod);
  3337. expr += ", ";
  3338. expr += to_expression(lod);
  3339. }
  3340. if (!dref && bias && hlsl_options.shader_model >= 40)
  3341. {
  3342. forward = forward && should_forward(bias);
  3343. expr += ", ";
  3344. expr += to_expression(bias);
  3345. }
  3346. if (coffset)
  3347. {
  3348. forward = forward && should_forward(coffset);
  3349. expr += ", ";
  3350. expr += to_expression(coffset);
  3351. }
  3352. else if (offset)
  3353. {
  3354. forward = forward && should_forward(offset);
  3355. expr += ", ";
  3356. expr += to_expression(offset);
  3357. }
  3358. if (sample)
  3359. {
  3360. expr += ", ";
  3361. expr += to_expression(sample);
  3362. }
  3363. expr += ")";
  3364. if (dref && hlsl_options.shader_model < 40)
  3365. expr += ".x";
  3366. if (op == OpImageQueryLod)
  3367. {
  3368. // This is rather awkward.
  3369. // textureQueryLod returns two values, the "accessed level",
  3370. // as well as the actual LOD lambda.
  3371. // As far as I can tell, there is no way to get the .x component
  3372. // according to GLSL spec, and it depends on the sampler itself.
  3373. // Just assume X == Y, so we will need to splat the result to a float2.
  3374. statement("float _", id, "_tmp = ", expr, ";");
  3375. statement("float2 _", id, " = _", id, "_tmp.xx;");
  3376. set<SPIRExpression>(id, join("_", id), result_type, true);
  3377. }
  3378. else
  3379. {
  3380. emit_op(result_type, id, expr, forward, false);
  3381. }
  3382. for (auto &inherit : inherited_expressions)
  3383. inherit_expression_dependencies(id, inherit);
  3384. switch (op)
  3385. {
  3386. case OpImageSampleDrefImplicitLod:
  3387. case OpImageSampleImplicitLod:
  3388. case OpImageSampleProjImplicitLod:
  3389. case OpImageSampleProjDrefImplicitLod:
  3390. register_control_dependent_expression(id);
  3391. break;
  3392. default:
  3393. break;
  3394. }
  3395. }
  3396. string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
  3397. {
  3398. const auto &type = get<SPIRType>(var.basetype);
  3399. // We can remap push constant blocks, even if they don't have any binding decoration.
  3400. if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding))
  3401. return "";
  3402. char space = '\0';
  3403. HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT;
  3404. switch (type.basetype)
  3405. {
  3406. case SPIRType::SampledImage:
  3407. space = 't'; // SRV
  3408. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3409. break;
  3410. case SPIRType::Image:
  3411. if (type.image.sampled == 2 && type.image.dim != DimSubpassData)
  3412. {
  3413. if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv)
  3414. {
  3415. space = 't'; // SRV
  3416. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3417. }
  3418. else
  3419. {
  3420. space = 'u'; // UAV
  3421. resource_flags = HLSL_BINDING_AUTO_UAV_BIT;
  3422. }
  3423. }
  3424. else
  3425. {
  3426. space = 't'; // SRV
  3427. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3428. }
  3429. break;
  3430. case SPIRType::Sampler:
  3431. space = 's';
  3432. resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT;
  3433. break;
  3434. case SPIRType::AccelerationStructure:
  3435. space = 't'; // SRV
  3436. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3437. break;
  3438. case SPIRType::Struct:
  3439. {
  3440. auto storage = type.storage;
  3441. if (storage == StorageClassUniform)
  3442. {
  3443. if (has_decoration(type.self, DecorationBufferBlock))
  3444. {
  3445. Bitset flags = ir.get_buffer_block_flags(var);
  3446. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  3447. space = is_readonly ? 't' : 'u'; // UAV
  3448. resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
  3449. }
  3450. else if (has_decoration(type.self, DecorationBlock))
  3451. {
  3452. space = 'b'; // Constant buffers
  3453. resource_flags = HLSL_BINDING_AUTO_CBV_BIT;
  3454. }
  3455. }
  3456. else if (storage == StorageClassPushConstant)
  3457. {
  3458. space = 'b'; // Constant buffers
  3459. resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT;
  3460. }
  3461. else if (storage == StorageClassStorageBuffer)
  3462. {
  3463. // UAV or SRV depending on readonly flag.
  3464. Bitset flags = ir.get_buffer_block_flags(var);
  3465. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  3466. space = is_readonly ? 't' : 'u';
  3467. resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
  3468. }
  3469. break;
  3470. }
  3471. default:
  3472. break;
  3473. }
  3474. if (!space)
  3475. return "";
  3476. uint32_t desc_set =
  3477. resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u;
  3478. uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u;
  3479. if (has_decoration(var.self, DecorationBinding))
  3480. binding = get_decoration(var.self, DecorationBinding);
  3481. if (has_decoration(var.self, DecorationDescriptorSet))
  3482. desc_set = get_decoration(var.self, DecorationDescriptorSet);
  3483. return to_resource_register(resource_flags, space, binding, desc_set);
  3484. }
  3485. string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
  3486. {
  3487. // For combined image samplers.
  3488. if (!has_decoration(var.self, DecorationBinding))
  3489. return "";
  3490. return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding),
  3491. get_decoration(var.self, DecorationDescriptorSet));
  3492. }
  3493. void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding)
  3494. {
  3495. auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding });
  3496. if (itr != end(resource_bindings))
  3497. {
  3498. auto &remap = itr->second;
  3499. remap.second = true;
  3500. switch (type)
  3501. {
  3502. case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT:
  3503. case HLSL_BINDING_AUTO_CBV_BIT:
  3504. desc_set = remap.first.cbv.register_space;
  3505. binding = remap.first.cbv.register_binding;
  3506. break;
  3507. case HLSL_BINDING_AUTO_SRV_BIT:
  3508. desc_set = remap.first.srv.register_space;
  3509. binding = remap.first.srv.register_binding;
  3510. break;
  3511. case HLSL_BINDING_AUTO_SAMPLER_BIT:
  3512. desc_set = remap.first.sampler.register_space;
  3513. binding = remap.first.sampler.register_binding;
  3514. break;
  3515. case HLSL_BINDING_AUTO_UAV_BIT:
  3516. desc_set = remap.first.uav.register_space;
  3517. binding = remap.first.uav.register_binding;
  3518. break;
  3519. default:
  3520. break;
  3521. }
  3522. }
  3523. }
  3524. string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set)
  3525. {
  3526. if ((flag & resource_binding_flags) == 0)
  3527. {
  3528. remap_hlsl_resource_binding(flag, space_set, binding);
  3529. // The push constant block did not have a binding, and there were no remap for it,
  3530. // so, declare without register binding.
  3531. if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet)
  3532. return "";
  3533. if (hlsl_options.shader_model >= 51)
  3534. return join(" : register(", space, binding, ", space", space_set, ")");
  3535. else
  3536. return join(" : register(", space, binding, ")");
  3537. }
  3538. else
  3539. return "";
  3540. }
  3541. void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
  3542. {
  3543. auto &type = get<SPIRType>(var.basetype);
  3544. switch (type.basetype)
  3545. {
  3546. case SPIRType::SampledImage:
  3547. case SPIRType::Image:
  3548. {
  3549. bool is_coherent = false;
  3550. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  3551. is_coherent = has_decoration(var.self, DecorationCoherent);
  3552. statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ",
  3553. to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
  3554. if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
  3555. {
  3556. // For combined image samplers, also emit a combined image sampler.
  3557. if (is_depth_image(type, var.self))
  3558. statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type),
  3559. to_resource_binding_sampler(var), ";");
  3560. else
  3561. statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type),
  3562. to_resource_binding_sampler(var), ";");
  3563. }
  3564. break;
  3565. }
  3566. case SPIRType::Sampler:
  3567. if (comparison_ids.count(var.self))
  3568. statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var),
  3569. ";");
  3570. else
  3571. statement("SamplerState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
  3572. break;
  3573. default:
  3574. statement(variable_decl(var), to_resource_binding(var), ";");
  3575. break;
  3576. }
  3577. }
  3578. void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var)
  3579. {
  3580. auto &type = get<SPIRType>(var.basetype);
  3581. switch (type.basetype)
  3582. {
  3583. case SPIRType::Sampler:
  3584. case SPIRType::Image:
  3585. SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL.");
  3586. default:
  3587. statement(variable_decl(var), ";");
  3588. break;
  3589. }
  3590. }
  3591. void CompilerHLSL::emit_uniform(const SPIRVariable &var)
  3592. {
  3593. add_resource_name(var.self);
  3594. if (hlsl_options.shader_model >= 40)
  3595. emit_modern_uniform(var);
  3596. else
  3597. emit_legacy_uniform(var);
  3598. }
  3599. bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
  3600. {
  3601. return false;
  3602. }
  3603. string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  3604. {
  3605. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
  3606. return type_to_glsl(out_type);
  3607. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
  3608. return type_to_glsl(out_type);
  3609. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  3610. return "asuint";
  3611. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
  3612. return type_to_glsl(out_type);
  3613. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
  3614. return type_to_glsl(out_type);
  3615. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  3616. return "asint";
  3617. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  3618. return "asfloat";
  3619. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  3620. return "asfloat";
  3621. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  3622. SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL.");
  3623. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  3624. SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL.");
  3625. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  3626. return "asdouble";
  3627. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  3628. return "asdouble";
  3629. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  3630. {
  3631. if (!requires_explicit_fp16_packing)
  3632. {
  3633. requires_explicit_fp16_packing = true;
  3634. force_recompile();
  3635. }
  3636. return "spvUnpackFloat2x16";
  3637. }
  3638. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  3639. {
  3640. if (!requires_explicit_fp16_packing)
  3641. {
  3642. requires_explicit_fp16_packing = true;
  3643. force_recompile();
  3644. }
  3645. return "spvPackFloat2x16";
  3646. }
  3647. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  3648. {
  3649. if (hlsl_options.shader_model < 40)
  3650. SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4.");
  3651. return "(" + type_to_glsl(out_type) + ")f32tof16";
  3652. }
  3653. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  3654. {
  3655. if (hlsl_options.shader_model < 40)
  3656. SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4.");
  3657. return "(" + type_to_glsl(out_type) + ")f16tof32";
  3658. }
  3659. else
  3660. return "";
  3661. }
  3662. void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
  3663. {
  3664. auto op = static_cast<GLSLstd450>(eop);
  3665. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  3666. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count);
  3667. auto int_type = to_signed_basetype(integer_width);
  3668. auto uint_type = to_unsigned_basetype(integer_width);
  3669. op = get_remapped_glsl_op(op);
  3670. switch (op)
  3671. {
  3672. case GLSLstd450InverseSqrt:
  3673. emit_unary_func_op(result_type, id, args[0], "rsqrt");
  3674. break;
  3675. case GLSLstd450Fract:
  3676. emit_unary_func_op(result_type, id, args[0], "frac");
  3677. break;
  3678. case GLSLstd450RoundEven:
  3679. if (hlsl_options.shader_model < 40)
  3680. SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3.");
  3681. emit_unary_func_op(result_type, id, args[0], "round");
  3682. break;
  3683. case GLSLstd450Trunc:
  3684. emit_unary_func_op(result_type, id, args[0], "trunc");
  3685. break;
  3686. case GLSLstd450Acosh:
  3687. case GLSLstd450Asinh:
  3688. case GLSLstd450Atanh:
  3689. // These are not supported in HLSL, always emulate them.
  3690. emit_emulated_ahyper_op(result_type, id, args[0], op);
  3691. break;
  3692. case GLSLstd450FMix:
  3693. case GLSLstd450IMix:
  3694. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp");
  3695. break;
  3696. case GLSLstd450Atan2:
  3697. emit_binary_func_op(result_type, id, args[0], args[1], "atan2");
  3698. break;
  3699. case GLSLstd450Fma:
  3700. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad");
  3701. break;
  3702. case GLSLstd450InterpolateAtCentroid:
  3703. emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid");
  3704. break;
  3705. case GLSLstd450InterpolateAtSample:
  3706. emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample");
  3707. break;
  3708. case GLSLstd450InterpolateAtOffset:
  3709. emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped");
  3710. break;
  3711. case GLSLstd450PackHalf2x16:
  3712. if (!requires_fp16_packing)
  3713. {
  3714. requires_fp16_packing = true;
  3715. force_recompile();
  3716. }
  3717. emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16");
  3718. break;
  3719. case GLSLstd450UnpackHalf2x16:
  3720. if (!requires_fp16_packing)
  3721. {
  3722. requires_fp16_packing = true;
  3723. force_recompile();
  3724. }
  3725. emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16");
  3726. break;
  3727. case GLSLstd450PackSnorm4x8:
  3728. if (!requires_snorm8_packing)
  3729. {
  3730. requires_snorm8_packing = true;
  3731. force_recompile();
  3732. }
  3733. emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8");
  3734. break;
  3735. case GLSLstd450UnpackSnorm4x8:
  3736. if (!requires_snorm8_packing)
  3737. {
  3738. requires_snorm8_packing = true;
  3739. force_recompile();
  3740. }
  3741. emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8");
  3742. break;
  3743. case GLSLstd450PackUnorm4x8:
  3744. if (!requires_unorm8_packing)
  3745. {
  3746. requires_unorm8_packing = true;
  3747. force_recompile();
  3748. }
  3749. emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8");
  3750. break;
  3751. case GLSLstd450UnpackUnorm4x8:
  3752. if (!requires_unorm8_packing)
  3753. {
  3754. requires_unorm8_packing = true;
  3755. force_recompile();
  3756. }
  3757. emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8");
  3758. break;
  3759. case GLSLstd450PackSnorm2x16:
  3760. if (!requires_snorm16_packing)
  3761. {
  3762. requires_snorm16_packing = true;
  3763. force_recompile();
  3764. }
  3765. emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16");
  3766. break;
  3767. case GLSLstd450UnpackSnorm2x16:
  3768. if (!requires_snorm16_packing)
  3769. {
  3770. requires_snorm16_packing = true;
  3771. force_recompile();
  3772. }
  3773. emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16");
  3774. break;
  3775. case GLSLstd450PackUnorm2x16:
  3776. if (!requires_unorm16_packing)
  3777. {
  3778. requires_unorm16_packing = true;
  3779. force_recompile();
  3780. }
  3781. emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16");
  3782. break;
  3783. case GLSLstd450UnpackUnorm2x16:
  3784. if (!requires_unorm16_packing)
  3785. {
  3786. requires_unorm16_packing = true;
  3787. force_recompile();
  3788. }
  3789. emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16");
  3790. break;
  3791. case GLSLstd450PackDouble2x32:
  3792. case GLSLstd450UnpackDouble2x32:
  3793. SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
  3794. case GLSLstd450FindILsb:
  3795. {
  3796. auto basetype = expression_type(args[0]).basetype;
  3797. emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype);
  3798. break;
  3799. }
  3800. case GLSLstd450FindSMsb:
  3801. emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type);
  3802. break;
  3803. case GLSLstd450FindUMsb:
  3804. emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type);
  3805. break;
  3806. case GLSLstd450MatrixInverse:
  3807. {
  3808. auto &type = get<SPIRType>(result_type);
  3809. if (type.vecsize == 2 && type.columns == 2)
  3810. {
  3811. if (!requires_inverse_2x2)
  3812. {
  3813. requires_inverse_2x2 = true;
  3814. force_recompile();
  3815. }
  3816. }
  3817. else if (type.vecsize == 3 && type.columns == 3)
  3818. {
  3819. if (!requires_inverse_3x3)
  3820. {
  3821. requires_inverse_3x3 = true;
  3822. force_recompile();
  3823. }
  3824. }
  3825. else if (type.vecsize == 4 && type.columns == 4)
  3826. {
  3827. if (!requires_inverse_4x4)
  3828. {
  3829. requires_inverse_4x4 = true;
  3830. force_recompile();
  3831. }
  3832. }
  3833. emit_unary_func_op(result_type, id, args[0], "spvInverse");
  3834. break;
  3835. }
  3836. case GLSLstd450Normalize:
  3837. // HLSL does not support scalar versions here.
  3838. if (expression_type(args[0]).vecsize == 1)
  3839. {
  3840. // Returns -1 or 1 for valid input, sign() does the job.
  3841. emit_unary_func_op(result_type, id, args[0], "sign");
  3842. }
  3843. else
  3844. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  3845. break;
  3846. case GLSLstd450Reflect:
  3847. if (get<SPIRType>(result_type).vecsize == 1)
  3848. {
  3849. if (!requires_scalar_reflect)
  3850. {
  3851. requires_scalar_reflect = true;
  3852. force_recompile();
  3853. }
  3854. emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
  3855. }
  3856. else
  3857. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  3858. break;
  3859. case GLSLstd450Refract:
  3860. if (get<SPIRType>(result_type).vecsize == 1)
  3861. {
  3862. if (!requires_scalar_refract)
  3863. {
  3864. requires_scalar_refract = true;
  3865. force_recompile();
  3866. }
  3867. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
  3868. }
  3869. else
  3870. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  3871. break;
  3872. case GLSLstd450FaceForward:
  3873. if (get<SPIRType>(result_type).vecsize == 1)
  3874. {
  3875. if (!requires_scalar_faceforward)
  3876. {
  3877. requires_scalar_faceforward = true;
  3878. force_recompile();
  3879. }
  3880. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
  3881. }
  3882. else
  3883. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  3884. break;
  3885. default:
  3886. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  3887. break;
  3888. }
  3889. }
  3890. void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain)
  3891. {
  3892. auto &type = get<SPIRType>(chain.basetype);
  3893. // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
  3894. auto ident = get_unique_identifier();
  3895. statement("[unroll]");
  3896. statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
  3897. ident, "++)");
  3898. begin_scope();
  3899. auto subchain = chain;
  3900. subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
  3901. subchain.basetype = type.parent_type;
  3902. if (!get<SPIRType>(subchain.basetype).array.empty())
  3903. subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
  3904. read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain);
  3905. end_scope();
  3906. }
  3907. void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain)
  3908. {
  3909. auto &type = get<SPIRType>(chain.basetype);
  3910. auto subchain = chain;
  3911. uint32_t member_count = uint32_t(type.member_types.size());
  3912. for (uint32_t i = 0; i < member_count; i++)
  3913. {
  3914. uint32_t offset = type_struct_member_offset(type, i);
  3915. subchain.static_index = chain.static_index + offset;
  3916. subchain.basetype = type.member_types[i];
  3917. subchain.matrix_stride = 0;
  3918. subchain.array_stride = 0;
  3919. subchain.row_major_matrix = false;
  3920. auto &member_type = get<SPIRType>(subchain.basetype);
  3921. if (member_type.columns > 1)
  3922. {
  3923. subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
  3924. subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
  3925. }
  3926. if (!member_type.array.empty())
  3927. subchain.array_stride = type_struct_member_array_stride(type, i);
  3928. read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain);
  3929. }
  3930. }
  3931. void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain)
  3932. {
  3933. auto &type = get<SPIRType>(chain.basetype);
  3934. SPIRType target_type;
  3935. target_type.basetype = SPIRType::UInt;
  3936. target_type.vecsize = type.vecsize;
  3937. target_type.columns = type.columns;
  3938. if (!type.array.empty())
  3939. {
  3940. read_access_chain_array(lhs, chain);
  3941. return;
  3942. }
  3943. else if (type.basetype == SPIRType::Struct)
  3944. {
  3945. read_access_chain_struct(lhs, chain);
  3946. return;
  3947. }
  3948. else if (type.width != 32 && !hlsl_options.enable_16bit_types)
  3949. SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and "
  3950. "native 16-bit types are enabled.");
  3951. string base = chain.base;
  3952. if (has_decoration(chain.self, DecorationNonUniform))
  3953. convert_non_uniform_expression(base, chain.self);
  3954. bool templated_load = hlsl_options.shader_model >= 62;
  3955. string load_expr;
  3956. string template_expr;
  3957. if (templated_load)
  3958. template_expr = join("<", type_to_glsl(type), ">");
  3959. // Load a vector or scalar.
  3960. if (type.columns == 1 && !chain.row_major_matrix)
  3961. {
  3962. const char *load_op = nullptr;
  3963. switch (type.vecsize)
  3964. {
  3965. case 1:
  3966. load_op = "Load";
  3967. break;
  3968. case 2:
  3969. load_op = "Load2";
  3970. break;
  3971. case 3:
  3972. load_op = "Load3";
  3973. break;
  3974. case 4:
  3975. load_op = "Load4";
  3976. break;
  3977. default:
  3978. SPIRV_CROSS_THROW("Unknown vector size.");
  3979. }
  3980. if (templated_load)
  3981. load_op = "Load";
  3982. load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")");
  3983. }
  3984. else if (type.columns == 1)
  3985. {
  3986. // Strided load since we are loading a column from a row-major matrix.
  3987. if (templated_load)
  3988. {
  3989. auto scalar_type = type;
  3990. scalar_type.vecsize = 1;
  3991. scalar_type.columns = 1;
  3992. template_expr = join("<", type_to_glsl(scalar_type), ">");
  3993. if (type.vecsize > 1)
  3994. load_expr += type_to_glsl(type) + "(";
  3995. }
  3996. else if (type.vecsize > 1)
  3997. {
  3998. load_expr = type_to_glsl(target_type);
  3999. load_expr += "(";
  4000. }
  4001. for (uint32_t r = 0; r < type.vecsize; r++)
  4002. {
  4003. load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
  4004. chain.static_index + r * chain.matrix_stride, ")");
  4005. if (r + 1 < type.vecsize)
  4006. load_expr += ", ";
  4007. }
  4008. if (type.vecsize > 1)
  4009. load_expr += ")";
  4010. }
  4011. else if (!chain.row_major_matrix)
  4012. {
  4013. // Load a matrix, column-major, the easy case.
  4014. const char *load_op = nullptr;
  4015. switch (type.vecsize)
  4016. {
  4017. case 1:
  4018. load_op = "Load";
  4019. break;
  4020. case 2:
  4021. load_op = "Load2";
  4022. break;
  4023. case 3:
  4024. load_op = "Load3";
  4025. break;
  4026. case 4:
  4027. load_op = "Load4";
  4028. break;
  4029. default:
  4030. SPIRV_CROSS_THROW("Unknown vector size.");
  4031. }
  4032. if (templated_load)
  4033. {
  4034. auto vector_type = type;
  4035. vector_type.columns = 1;
  4036. template_expr = join("<", type_to_glsl(vector_type), ">");
  4037. load_expr = type_to_glsl(type);
  4038. load_op = "Load";
  4039. }
  4040. else
  4041. {
  4042. // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend,
  4043. // so row-major is technically column-major ...
  4044. load_expr = type_to_glsl(target_type);
  4045. }
  4046. load_expr += "(";
  4047. for (uint32_t c = 0; c < type.columns; c++)
  4048. {
  4049. load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index,
  4050. chain.static_index + c * chain.matrix_stride, ")");
  4051. if (c + 1 < type.columns)
  4052. load_expr += ", ";
  4053. }
  4054. load_expr += ")";
  4055. }
  4056. else
  4057. {
  4058. // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern
  4059. // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ...
  4060. if (templated_load)
  4061. {
  4062. load_expr = type_to_glsl(type);
  4063. auto scalar_type = type;
  4064. scalar_type.vecsize = 1;
  4065. scalar_type.columns = 1;
  4066. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4067. }
  4068. else
  4069. load_expr = type_to_glsl(target_type);
  4070. load_expr += "(";
  4071. for (uint32_t c = 0; c < type.columns; c++)
  4072. {
  4073. for (uint32_t r = 0; r < type.vecsize; r++)
  4074. {
  4075. load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
  4076. chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")");
  4077. if ((r + 1 < type.vecsize) || (c + 1 < type.columns))
  4078. load_expr += ", ";
  4079. }
  4080. }
  4081. load_expr += ")";
  4082. }
  4083. if (!templated_load)
  4084. {
  4085. auto bitcast_op = bitcast_glsl_op(type, target_type);
  4086. if (!bitcast_op.empty())
  4087. load_expr = join(bitcast_op, "(", load_expr, ")");
  4088. }
  4089. if (lhs.empty())
  4090. {
  4091. assert(expr);
  4092. *expr = std::move(load_expr);
  4093. }
  4094. else
  4095. statement(lhs, " = ", load_expr, ";");
  4096. }
  4097. void CompilerHLSL::emit_load(const Instruction &instruction)
  4098. {
  4099. auto ops = stream(instruction);
  4100. auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
  4101. if (chain)
  4102. {
  4103. uint32_t result_type = ops[0];
  4104. uint32_t id = ops[1];
  4105. uint32_t ptr = ops[2];
  4106. auto &type = get<SPIRType>(result_type);
  4107. bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct;
  4108. if (composite_load)
  4109. {
  4110. // We cannot make this work in one single expression as we might have nested structures and arrays,
  4111. // so unroll the load to an uninitialized temporary.
  4112. emit_uninitialized_temporary_expression(result_type, id);
  4113. read_access_chain(nullptr, to_expression(id), *chain);
  4114. track_expression_read(chain->self);
  4115. }
  4116. else
  4117. {
  4118. string load_expr;
  4119. read_access_chain(&load_expr, "", *chain);
  4120. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  4121. // If we are forwarding this load,
  4122. // don't register the read to access chain here, defer that to when we actually use the expression,
  4123. // using the add_implied_read_expression mechanism.
  4124. if (!forward)
  4125. track_expression_read(chain->self);
  4126. // Do not forward complex load sequences like matrices, structs and arrays.
  4127. if (type.columns > 1)
  4128. forward = false;
  4129. auto &e = emit_op(result_type, id, load_expr, forward, true);
  4130. e.need_transpose = false;
  4131. register_read(id, ptr, forward);
  4132. inherit_expression_dependencies(id, ptr);
  4133. if (forward)
  4134. add_implied_read_expression(e, chain->self);
  4135. }
  4136. }
  4137. else
  4138. CompilerGLSL::emit_instruction(instruction);
  4139. }
  4140. void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
  4141. const SmallVector<uint32_t> &composite_chain)
  4142. {
  4143. auto *ptype = &get<SPIRType>(chain.basetype);
  4144. while (ptype->pointer)
  4145. {
  4146. ptype = &get<SPIRType>(ptype->basetype);
  4147. }
  4148. auto &type = *ptype;
  4149. // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
  4150. auto ident = get_unique_identifier();
  4151. uint32_t id = ir.increase_bound_by(2);
  4152. uint32_t int_type_id = id + 1;
  4153. SPIRType int_type;
  4154. int_type.basetype = SPIRType::Int;
  4155. int_type.width = 32;
  4156. set<SPIRType>(int_type_id, int_type);
  4157. set<SPIRExpression>(id, ident, int_type_id, true);
  4158. set_name(id, ident);
  4159. suppressed_usage_tracking.insert(id);
  4160. statement("[unroll]");
  4161. statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
  4162. ident, "++)");
  4163. begin_scope();
  4164. auto subchain = chain;
  4165. subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
  4166. subchain.basetype = type.parent_type;
  4167. // Forcefully allow us to use an ID here by setting MSB.
  4168. auto subcomposite_chain = composite_chain;
  4169. subcomposite_chain.push_back(0x80000000u | id);
  4170. if (!get<SPIRType>(subchain.basetype).array.empty())
  4171. subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
  4172. write_access_chain(subchain, value, subcomposite_chain);
  4173. end_scope();
  4174. }
  4175. void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
  4176. const SmallVector<uint32_t> &composite_chain)
  4177. {
  4178. auto &type = get<SPIRType>(chain.basetype);
  4179. uint32_t member_count = uint32_t(type.member_types.size());
  4180. auto subchain = chain;
  4181. auto subcomposite_chain = composite_chain;
  4182. subcomposite_chain.push_back(0);
  4183. for (uint32_t i = 0; i < member_count; i++)
  4184. {
  4185. uint32_t offset = type_struct_member_offset(type, i);
  4186. subchain.static_index = chain.static_index + offset;
  4187. subchain.basetype = type.member_types[i];
  4188. subchain.matrix_stride = 0;
  4189. subchain.array_stride = 0;
  4190. subchain.row_major_matrix = false;
  4191. auto &member_type = get<SPIRType>(subchain.basetype);
  4192. if (member_type.columns > 1)
  4193. {
  4194. subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
  4195. subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
  4196. }
  4197. if (!member_type.array.empty())
  4198. subchain.array_stride = type_struct_member_array_stride(type, i);
  4199. subcomposite_chain.back() = i;
  4200. write_access_chain(subchain, value, subcomposite_chain);
  4201. }
  4202. }
  4203. string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain,
  4204. bool enclose)
  4205. {
  4206. string ret;
  4207. if (composite_chain.empty())
  4208. ret = to_expression(value);
  4209. else
  4210. {
  4211. AccessChainMeta meta;
  4212. ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()),
  4213. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta);
  4214. }
  4215. if (enclose)
  4216. ret = enclose_expression(ret);
  4217. return ret;
  4218. }
  4219. void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value,
  4220. const SmallVector<uint32_t> &composite_chain)
  4221. {
  4222. auto &type = get<SPIRType>(chain.basetype);
  4223. // Make sure we trigger a read of the constituents in the access chain.
  4224. track_expression_read(chain.self);
  4225. SPIRType target_type;
  4226. target_type.basetype = SPIRType::UInt;
  4227. target_type.vecsize = type.vecsize;
  4228. target_type.columns = type.columns;
  4229. if (!type.array.empty())
  4230. {
  4231. write_access_chain_array(chain, value, composite_chain);
  4232. register_write(chain.self);
  4233. return;
  4234. }
  4235. else if (type.basetype == SPIRType::Struct)
  4236. {
  4237. write_access_chain_struct(chain, value, composite_chain);
  4238. register_write(chain.self);
  4239. return;
  4240. }
  4241. else if (type.width != 32 && !hlsl_options.enable_16bit_types)
  4242. SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and "
  4243. "native 16-bit types are enabled.");
  4244. bool templated_store = hlsl_options.shader_model >= 62;
  4245. auto base = chain.base;
  4246. if (has_decoration(chain.self, DecorationNonUniform))
  4247. convert_non_uniform_expression(base, chain.self);
  4248. string template_expr;
  4249. if (templated_store)
  4250. template_expr = join("<", type_to_glsl(type), ">");
  4251. if (type.columns == 1 && !chain.row_major_matrix)
  4252. {
  4253. const char *store_op = nullptr;
  4254. switch (type.vecsize)
  4255. {
  4256. case 1:
  4257. store_op = "Store";
  4258. break;
  4259. case 2:
  4260. store_op = "Store2";
  4261. break;
  4262. case 3:
  4263. store_op = "Store3";
  4264. break;
  4265. case 4:
  4266. store_op = "Store4";
  4267. break;
  4268. default:
  4269. SPIRV_CROSS_THROW("Unknown vector size.");
  4270. }
  4271. auto store_expr = write_access_chain_value(value, composite_chain, false);
  4272. if (!templated_store)
  4273. {
  4274. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4275. if (!bitcast_op.empty())
  4276. store_expr = join(bitcast_op, "(", store_expr, ")");
  4277. }
  4278. else
  4279. store_op = "Store";
  4280. statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ",
  4281. store_expr, ");");
  4282. }
  4283. else if (type.columns == 1)
  4284. {
  4285. if (templated_store)
  4286. {
  4287. auto scalar_type = type;
  4288. scalar_type.vecsize = 1;
  4289. scalar_type.columns = 1;
  4290. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4291. }
  4292. // Strided store.
  4293. for (uint32_t r = 0; r < type.vecsize; r++)
  4294. {
  4295. auto store_expr = write_access_chain_value(value, composite_chain, true);
  4296. if (type.vecsize > 1)
  4297. {
  4298. store_expr += ".";
  4299. store_expr += index_to_swizzle(r);
  4300. }
  4301. remove_duplicate_swizzle(store_expr);
  4302. if (!templated_store)
  4303. {
  4304. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4305. if (!bitcast_op.empty())
  4306. store_expr = join(bitcast_op, "(", store_expr, ")");
  4307. }
  4308. statement(base, ".Store", template_expr, "(", chain.dynamic_index,
  4309. chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");");
  4310. }
  4311. }
  4312. else if (!chain.row_major_matrix)
  4313. {
  4314. const char *store_op = nullptr;
  4315. switch (type.vecsize)
  4316. {
  4317. case 1:
  4318. store_op = "Store";
  4319. break;
  4320. case 2:
  4321. store_op = "Store2";
  4322. break;
  4323. case 3:
  4324. store_op = "Store3";
  4325. break;
  4326. case 4:
  4327. store_op = "Store4";
  4328. break;
  4329. default:
  4330. SPIRV_CROSS_THROW("Unknown vector size.");
  4331. }
  4332. if (templated_store)
  4333. {
  4334. store_op = "Store";
  4335. auto vector_type = type;
  4336. vector_type.columns = 1;
  4337. template_expr = join("<", type_to_glsl(vector_type), ">");
  4338. }
  4339. for (uint32_t c = 0; c < type.columns; c++)
  4340. {
  4341. auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]");
  4342. if (!templated_store)
  4343. {
  4344. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4345. if (!bitcast_op.empty())
  4346. store_expr = join(bitcast_op, "(", store_expr, ")");
  4347. }
  4348. statement(base, ".", store_op, template_expr, "(", chain.dynamic_index,
  4349. chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");");
  4350. }
  4351. }
  4352. else
  4353. {
  4354. if (templated_store)
  4355. {
  4356. auto scalar_type = type;
  4357. scalar_type.vecsize = 1;
  4358. scalar_type.columns = 1;
  4359. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4360. }
  4361. for (uint32_t r = 0; r < type.vecsize; r++)
  4362. {
  4363. for (uint32_t c = 0; c < type.columns; c++)
  4364. {
  4365. auto store_expr =
  4366. join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r));
  4367. remove_duplicate_swizzle(store_expr);
  4368. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4369. if (!bitcast_op.empty())
  4370. store_expr = join(bitcast_op, "(", store_expr, ")");
  4371. statement(base, ".Store", template_expr, "(", chain.dynamic_index,
  4372. chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");");
  4373. }
  4374. }
  4375. }
  4376. register_write(chain.self);
  4377. }
  4378. void CompilerHLSL::emit_store(const Instruction &instruction)
  4379. {
  4380. auto ops = stream(instruction);
  4381. if (options.vertex.flip_vert_y)
  4382. {
  4383. auto *expr = maybe_get<SPIRExpression>(ops[0]);
  4384. if (expr != nullptr && expr->access_meshlet_position_y)
  4385. {
  4386. auto lhs = to_dereferenced_expression(ops[0]);
  4387. auto rhs = to_unpacked_expression(ops[1]);
  4388. statement(lhs, " = spvFlipVertY(", rhs, ");");
  4389. register_write(ops[0]);
  4390. return;
  4391. }
  4392. }
  4393. auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
  4394. if (chain)
  4395. write_access_chain(*chain, ops[1], {});
  4396. else
  4397. CompilerGLSL::emit_instruction(instruction);
  4398. }
  4399. void CompilerHLSL::emit_access_chain(const Instruction &instruction)
  4400. {
  4401. auto ops = stream(instruction);
  4402. uint32_t length = instruction.length;
  4403. bool need_byte_access_chain = false;
  4404. auto &type = expression_type(ops[2]);
  4405. const auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
  4406. if (chain)
  4407. {
  4408. // Keep tacking on an existing access chain.
  4409. need_byte_access_chain = true;
  4410. }
  4411. else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock))
  4412. {
  4413. // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need
  4414. // to emit SPIRAccessChain rather than a plain SPIRExpression.
  4415. uint32_t chain_arguments = length - 3;
  4416. if (chain_arguments > type.array.size())
  4417. need_byte_access_chain = true;
  4418. }
  4419. if (need_byte_access_chain)
  4420. {
  4421. // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block,
  4422. // and not array of SSBO.
  4423. uint32_t to_plain_buffer_length = chain ? 0u : static_cast<uint32_t>(type.array.size());
  4424. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  4425. if (backing_variable != nullptr && is_user_type_structured(backing_variable->self))
  4426. {
  4427. CompilerGLSL::emit_instruction(instruction);
  4428. return;
  4429. }
  4430. string base;
  4431. if (to_plain_buffer_length != 0)
  4432. base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get<SPIRType>(ops[0]));
  4433. else if (chain)
  4434. base = chain->base;
  4435. else
  4436. base = to_expression(ops[2]);
  4437. // Start traversing type hierarchy at the proper non-pointer types.
  4438. auto *basetype = &get_pointee_type(type);
  4439. // Traverse the type hierarchy down to the actual buffer types.
  4440. for (uint32_t i = 0; i < to_plain_buffer_length; i++)
  4441. {
  4442. assert(basetype->parent_type);
  4443. basetype = &get<SPIRType>(basetype->parent_type);
  4444. }
  4445. uint32_t matrix_stride = 0;
  4446. uint32_t array_stride = 0;
  4447. bool row_major_matrix = false;
  4448. // Inherit matrix information.
  4449. if (chain)
  4450. {
  4451. matrix_stride = chain->matrix_stride;
  4452. row_major_matrix = chain->row_major_matrix;
  4453. array_stride = chain->array_stride;
  4454. }
  4455. auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
  4456. length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix,
  4457. &matrix_stride, &array_stride);
  4458. auto &e = set<SPIRAccessChain>(ops[1], ops[0], type.storage, base, offsets.first, offsets.second);
  4459. e.row_major_matrix = row_major_matrix;
  4460. e.matrix_stride = matrix_stride;
  4461. e.array_stride = array_stride;
  4462. e.immutable = should_forward(ops[2]);
  4463. e.loaded_from = backing_variable ? backing_variable->self : ID(0);
  4464. if (chain)
  4465. {
  4466. e.dynamic_index += chain->dynamic_index;
  4467. e.static_index += chain->static_index;
  4468. }
  4469. for (uint32_t i = 2; i < length; i++)
  4470. {
  4471. inherit_expression_dependencies(ops[1], ops[i]);
  4472. add_implied_read_expression(e, ops[i]);
  4473. }
  4474. }
  4475. else
  4476. {
  4477. CompilerGLSL::emit_instruction(instruction);
  4478. }
  4479. }
  4480. void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
  4481. {
  4482. const char *atomic_op = nullptr;
  4483. string value_expr;
  4484. if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore)
  4485. value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
  4486. bool is_atomic_store = false;
  4487. switch (op)
  4488. {
  4489. case OpAtomicIIncrement:
  4490. atomic_op = "InterlockedAdd";
  4491. value_expr = "1";
  4492. break;
  4493. case OpAtomicIDecrement:
  4494. atomic_op = "InterlockedAdd";
  4495. value_expr = "-1";
  4496. break;
  4497. case OpAtomicLoad:
  4498. atomic_op = "InterlockedAdd";
  4499. value_expr = "0";
  4500. break;
  4501. case OpAtomicISub:
  4502. atomic_op = "InterlockedAdd";
  4503. value_expr = join("-", enclose_expression(value_expr));
  4504. break;
  4505. case OpAtomicSMin:
  4506. case OpAtomicUMin:
  4507. atomic_op = "InterlockedMin";
  4508. break;
  4509. case OpAtomicSMax:
  4510. case OpAtomicUMax:
  4511. atomic_op = "InterlockedMax";
  4512. break;
  4513. case OpAtomicAnd:
  4514. atomic_op = "InterlockedAnd";
  4515. break;
  4516. case OpAtomicOr:
  4517. atomic_op = "InterlockedOr";
  4518. break;
  4519. case OpAtomicXor:
  4520. atomic_op = "InterlockedXor";
  4521. break;
  4522. case OpAtomicIAdd:
  4523. atomic_op = "InterlockedAdd";
  4524. break;
  4525. case OpAtomicExchange:
  4526. atomic_op = "InterlockedExchange";
  4527. break;
  4528. case OpAtomicStore:
  4529. atomic_op = "InterlockedExchange";
  4530. is_atomic_store = true;
  4531. break;
  4532. case OpAtomicCompareExchange:
  4533. if (length < 8)
  4534. SPIRV_CROSS_THROW("Not enough data for opcode.");
  4535. atomic_op = "InterlockedCompareExchange";
  4536. value_expr = join(to_expression(ops[7]), ", ", value_expr);
  4537. break;
  4538. default:
  4539. SPIRV_CROSS_THROW("Unknown atomic opcode.");
  4540. }
  4541. if (is_atomic_store)
  4542. {
  4543. auto &data_type = expression_type(ops[0]);
  4544. auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
  4545. auto &tmp_id = extra_sub_expressions[ops[0]];
  4546. if (!tmp_id)
  4547. {
  4548. tmp_id = ir.increase_bound_by(1);
  4549. emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id);
  4550. }
  4551. if (data_type.storage == StorageClassImage || !chain)
  4552. {
  4553. statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ",
  4554. to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
  4555. }
  4556. else
  4557. {
  4558. string base = chain->base;
  4559. if (has_decoration(chain->self, DecorationNonUniform))
  4560. convert_non_uniform_expression(base, chain->self);
  4561. // RWByteAddress buffer is always uint in its underlying type.
  4562. statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ",
  4563. to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
  4564. }
  4565. }
  4566. else
  4567. {
  4568. uint32_t result_type = ops[0];
  4569. uint32_t id = ops[1];
  4570. forced_temporaries.insert(ops[1]);
  4571. auto &type = get<SPIRType>(result_type);
  4572. statement(variable_decl(type, to_name(id)), ";");
  4573. auto &data_type = expression_type(ops[2]);
  4574. auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
  4575. SPIRType::BaseType expr_type;
  4576. if (data_type.storage == StorageClassImage || !chain)
  4577. {
  4578. statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
  4579. expr_type = data_type.basetype;
  4580. }
  4581. else
  4582. {
  4583. // RWByteAddress buffer is always uint in its underlying type.
  4584. string base = chain->base;
  4585. if (has_decoration(chain->self, DecorationNonUniform))
  4586. convert_non_uniform_expression(base, chain->self);
  4587. expr_type = SPIRType::UInt;
  4588. statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
  4589. ", ", to_name(id), ");");
  4590. }
  4591. auto expr = bitcast_expression(type, expr_type, to_name(id));
  4592. set<SPIRExpression>(id, expr, result_type, true);
  4593. }
  4594. flush_all_atomic_capable_variables();
  4595. }
  4596. void CompilerHLSL::emit_subgroup_op(const Instruction &i)
  4597. {
  4598. if (hlsl_options.shader_model < 60)
  4599. SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher.");
  4600. const uint32_t *ops = stream(i);
  4601. auto op = static_cast<Op>(i.op);
  4602. uint32_t result_type = ops[0];
  4603. uint32_t id = ops[1];
  4604. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  4605. if (scope != ScopeSubgroup)
  4606. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  4607. const auto make_inclusive_Sum = [&](const string &expr) -> string {
  4608. return join(expr, " + ", to_expression(ops[4]));
  4609. };
  4610. const auto make_inclusive_Product = [&](const string &expr) -> string {
  4611. return join(expr, " * ", to_expression(ops[4]));
  4612. };
  4613. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  4614. uint32_t integer_width = get_integer_width_for_instruction(i);
  4615. auto int_type = to_signed_basetype(integer_width);
  4616. auto uint_type = to_unsigned_basetype(integer_width);
  4617. #define make_inclusive_BitAnd(expr) ""
  4618. #define make_inclusive_BitOr(expr) ""
  4619. #define make_inclusive_BitXor(expr) ""
  4620. #define make_inclusive_Min(expr) ""
  4621. #define make_inclusive_Max(expr) ""
  4622. switch (op)
  4623. {
  4624. case OpGroupNonUniformElect:
  4625. emit_op(result_type, id, "WaveIsFirstLane()", true);
  4626. break;
  4627. case OpGroupNonUniformBroadcast:
  4628. emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
  4629. break;
  4630. case OpGroupNonUniformBroadcastFirst:
  4631. emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst");
  4632. break;
  4633. case OpGroupNonUniformBallot:
  4634. emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot");
  4635. break;
  4636. case OpGroupNonUniformInverseBallot:
  4637. SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL.");
  4638. case OpGroupNonUniformBallotBitExtract:
  4639. SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL.");
  4640. case OpGroupNonUniformBallotFindLSB:
  4641. SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL.");
  4642. case OpGroupNonUniformBallotFindMSB:
  4643. SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL.");
  4644. case OpGroupNonUniformBallotBitCount:
  4645. {
  4646. auto operation = static_cast<GroupOperation>(ops[3]);
  4647. bool forward = should_forward(ops[4]);
  4648. if (operation == GroupOperationReduce)
  4649. {
  4650. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(",
  4651. to_enclosed_expression(ops[4]), ".y)");
  4652. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(",
  4653. to_enclosed_expression(ops[4]), ".w)");
  4654. emit_op(result_type, id, join(left, " + ", right), forward);
  4655. inherit_expression_dependencies(id, ops[4]);
  4656. }
  4657. else if (operation == GroupOperationInclusiveScan)
  4658. {
  4659. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(",
  4660. to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)");
  4661. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(",
  4662. to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)");
  4663. emit_op(result_type, id, join(left, " + ", right), forward);
  4664. if (!active_input_builtins.get(BuiltInSubgroupLeMask))
  4665. {
  4666. active_input_builtins.set(BuiltInSubgroupLeMask);
  4667. force_recompile_guarantee_forward_progress();
  4668. }
  4669. }
  4670. else if (operation == GroupOperationExclusiveScan)
  4671. {
  4672. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(",
  4673. to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)");
  4674. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(",
  4675. to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)");
  4676. emit_op(result_type, id, join(left, " + ", right), forward);
  4677. if (!active_input_builtins.get(BuiltInSubgroupLtMask))
  4678. {
  4679. active_input_builtins.set(BuiltInSubgroupLtMask);
  4680. force_recompile_guarantee_forward_progress();
  4681. }
  4682. }
  4683. else
  4684. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  4685. break;
  4686. }
  4687. case OpGroupNonUniformShuffle:
  4688. emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
  4689. break;
  4690. case OpGroupNonUniformShuffleXor:
  4691. {
  4692. bool forward = should_forward(ops[3]);
  4693. emit_op(ops[0], ops[1],
  4694. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  4695. "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward);
  4696. inherit_expression_dependencies(ops[1], ops[3]);
  4697. break;
  4698. }
  4699. case OpGroupNonUniformShuffleUp:
  4700. {
  4701. bool forward = should_forward(ops[3]);
  4702. emit_op(ops[0], ops[1],
  4703. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  4704. "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward);
  4705. inherit_expression_dependencies(ops[1], ops[3]);
  4706. break;
  4707. }
  4708. case OpGroupNonUniformShuffleDown:
  4709. {
  4710. bool forward = should_forward(ops[3]);
  4711. emit_op(ops[0], ops[1],
  4712. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  4713. "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward);
  4714. inherit_expression_dependencies(ops[1], ops[3]);
  4715. break;
  4716. }
  4717. case OpGroupNonUniformAll:
  4718. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue");
  4719. break;
  4720. case OpGroupNonUniformAny:
  4721. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue");
  4722. break;
  4723. case OpGroupNonUniformAllEqual:
  4724. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual");
  4725. break;
  4726. // clang-format off
  4727. #define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \
  4728. case OpGroupNonUniform##op: \
  4729. { \
  4730. auto operation = static_cast<GroupOperation>(ops[3]); \
  4731. if (operation == GroupOperationReduce) \
  4732. emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \
  4733. else if (operation == GroupOperationInclusiveScan && supports_scan) \
  4734. { \
  4735. bool forward = should_forward(ops[4]); \
  4736. emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \
  4737. inherit_expression_dependencies(id, ops[4]); \
  4738. } \
  4739. else if (operation == GroupOperationExclusiveScan && supports_scan) \
  4740. emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \
  4741. else if (operation == GroupOperationClusteredReduce) \
  4742. SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \
  4743. else \
  4744. SPIRV_CROSS_THROW("Invalid group operation."); \
  4745. break; \
  4746. }
  4747. #define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \
  4748. case OpGroupNonUniform##op: \
  4749. { \
  4750. auto operation = static_cast<GroupOperation>(ops[3]); \
  4751. if (operation == GroupOperationReduce) \
  4752. emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \
  4753. else \
  4754. SPIRV_CROSS_THROW("Invalid group operation."); \
  4755. break; \
  4756. }
  4757. HLSL_GROUP_OP(FAdd, Sum, true)
  4758. HLSL_GROUP_OP(FMul, Product, true)
  4759. HLSL_GROUP_OP(FMin, Min, false)
  4760. HLSL_GROUP_OP(FMax, Max, false)
  4761. HLSL_GROUP_OP(IAdd, Sum, true)
  4762. HLSL_GROUP_OP(IMul, Product, true)
  4763. HLSL_GROUP_OP_CAST(SMin, Min, int_type)
  4764. HLSL_GROUP_OP_CAST(SMax, Max, int_type)
  4765. HLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  4766. HLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  4767. HLSL_GROUP_OP(BitwiseAnd, BitAnd, false)
  4768. HLSL_GROUP_OP(BitwiseOr, BitOr, false)
  4769. HLSL_GROUP_OP(BitwiseXor, BitXor, false)
  4770. HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type)
  4771. HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type)
  4772. HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type)
  4773. #undef HLSL_GROUP_OP
  4774. #undef HLSL_GROUP_OP_CAST
  4775. // clang-format on
  4776. case OpGroupNonUniformQuadSwap:
  4777. {
  4778. uint32_t direction = evaluate_constant_u32(ops[4]);
  4779. if (direction == 0)
  4780. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX");
  4781. else if (direction == 1)
  4782. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY");
  4783. else if (direction == 2)
  4784. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal");
  4785. else
  4786. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  4787. break;
  4788. }
  4789. case OpGroupNonUniformQuadBroadcast:
  4790. {
  4791. emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt");
  4792. break;
  4793. }
  4794. default:
  4795. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  4796. }
  4797. register_control_dependent_expression(id);
  4798. }
  4799. void CompilerHLSL::emit_instruction(const Instruction &instruction)
  4800. {
  4801. auto ops = stream(instruction);
  4802. auto opcode = static_cast<Op>(instruction.op);
  4803. #define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  4804. #define HLSL_BOP_CAST(op, type) \
  4805. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false)
  4806. #define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  4807. #define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  4808. #define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  4809. #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  4810. #define HLSL_BFOP_CAST(op, type) \
  4811. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  4812. #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  4813. #define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  4814. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  4815. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  4816. auto int_type = to_signed_basetype(integer_width);
  4817. auto uint_type = to_unsigned_basetype(integer_width);
  4818. opcode = get_remapped_spirv_op(opcode);
  4819. switch (opcode)
  4820. {
  4821. case OpAccessChain:
  4822. case OpInBoundsAccessChain:
  4823. {
  4824. emit_access_chain(instruction);
  4825. break;
  4826. }
  4827. case OpBitcast:
  4828. {
  4829. auto bitcast_type = get_bitcast_type(ops[0], ops[2]);
  4830. if (bitcast_type == CompilerHLSL::TypeNormal)
  4831. CompilerGLSL::emit_instruction(instruction);
  4832. else
  4833. {
  4834. if (!requires_uint2_packing)
  4835. {
  4836. requires_uint2_packing = true;
  4837. force_recompile();
  4838. }
  4839. if (bitcast_type == CompilerHLSL::TypePackUint2x32)
  4840. emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32");
  4841. else
  4842. emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32");
  4843. }
  4844. break;
  4845. }
  4846. case OpSelect:
  4847. {
  4848. auto &value_type = expression_type(ops[3]);
  4849. if (value_type.basetype == SPIRType::Struct || is_array(value_type))
  4850. {
  4851. // HLSL does not support ternary expressions on composites.
  4852. // Cannot use branches, since we might be in a continue block
  4853. // where explicit control flow is prohibited.
  4854. // Emit a helper function where we can use control flow.
  4855. TypeID value_type_id = expression_type_id(ops[3]);
  4856. auto itr = std::find(composite_selection_workaround_types.begin(),
  4857. composite_selection_workaround_types.end(),
  4858. value_type_id);
  4859. if (itr == composite_selection_workaround_types.end())
  4860. {
  4861. composite_selection_workaround_types.push_back(value_type_id);
  4862. force_recompile();
  4863. }
  4864. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  4865. statement("spvSelectComposite(",
  4866. to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  4867. to_expression(ops[3]), ", ", to_expression(ops[4]), ");");
  4868. }
  4869. else
  4870. CompilerGLSL::emit_instruction(instruction);
  4871. break;
  4872. }
  4873. case OpStore:
  4874. {
  4875. emit_store(instruction);
  4876. break;
  4877. }
  4878. case OpLoad:
  4879. {
  4880. emit_load(instruction);
  4881. break;
  4882. }
  4883. case OpMatrixTimesVector:
  4884. {
  4885. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  4886. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  4887. break;
  4888. }
  4889. case OpVectorTimesMatrix:
  4890. {
  4891. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  4892. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  4893. break;
  4894. }
  4895. case OpMatrixTimesMatrix:
  4896. {
  4897. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  4898. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  4899. break;
  4900. }
  4901. case OpOuterProduct:
  4902. {
  4903. uint32_t result_type = ops[0];
  4904. uint32_t id = ops[1];
  4905. uint32_t a = ops[2];
  4906. uint32_t b = ops[3];
  4907. auto &type = get<SPIRType>(result_type);
  4908. string expr = type_to_glsl_constructor(type);
  4909. expr += "(";
  4910. for (uint32_t col = 0; col < type.columns; col++)
  4911. {
  4912. expr += to_enclosed_expression(a);
  4913. expr += " * ";
  4914. expr += to_extract_component_expression(b, col);
  4915. if (col + 1 < type.columns)
  4916. expr += ", ";
  4917. }
  4918. expr += ")";
  4919. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  4920. inherit_expression_dependencies(id, a);
  4921. inherit_expression_dependencies(id, b);
  4922. break;
  4923. }
  4924. case OpFMod:
  4925. {
  4926. if (!requires_op_fmod)
  4927. {
  4928. requires_op_fmod = true;
  4929. force_recompile();
  4930. }
  4931. CompilerGLSL::emit_instruction(instruction);
  4932. break;
  4933. }
  4934. case OpFRem:
  4935. emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod");
  4936. break;
  4937. case OpImage:
  4938. {
  4939. uint32_t result_type = ops[0];
  4940. uint32_t id = ops[1];
  4941. auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]);
  4942. if (combined)
  4943. {
  4944. auto &e = emit_op(result_type, id, to_expression(combined->image), true, true);
  4945. auto *var = maybe_get_backing_variable(combined->image);
  4946. if (var)
  4947. e.loaded_from = var->self;
  4948. }
  4949. else
  4950. {
  4951. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  4952. auto *var = maybe_get_backing_variable(ops[2]);
  4953. if (var)
  4954. e.loaded_from = var->self;
  4955. }
  4956. break;
  4957. }
  4958. case OpDPdx:
  4959. HLSL_UFOP(ddx);
  4960. register_control_dependent_expression(ops[1]);
  4961. break;
  4962. case OpDPdy:
  4963. HLSL_UFOP(ddy);
  4964. register_control_dependent_expression(ops[1]);
  4965. break;
  4966. case OpDPdxFine:
  4967. HLSL_UFOP(ddx_fine);
  4968. register_control_dependent_expression(ops[1]);
  4969. break;
  4970. case OpDPdyFine:
  4971. HLSL_UFOP(ddy_fine);
  4972. register_control_dependent_expression(ops[1]);
  4973. break;
  4974. case OpDPdxCoarse:
  4975. HLSL_UFOP(ddx_coarse);
  4976. register_control_dependent_expression(ops[1]);
  4977. break;
  4978. case OpDPdyCoarse:
  4979. HLSL_UFOP(ddy_coarse);
  4980. register_control_dependent_expression(ops[1]);
  4981. break;
  4982. case OpFwidth:
  4983. case OpFwidthCoarse:
  4984. case OpFwidthFine:
  4985. HLSL_UFOP(fwidth);
  4986. register_control_dependent_expression(ops[1]);
  4987. break;
  4988. case OpLogicalNot:
  4989. {
  4990. auto result_type = ops[0];
  4991. auto id = ops[1];
  4992. auto &type = get<SPIRType>(result_type);
  4993. if (type.vecsize > 1)
  4994. emit_unrolled_unary_op(result_type, id, ops[2], "!");
  4995. else
  4996. HLSL_UOP(!);
  4997. break;
  4998. }
  4999. case OpIEqual:
  5000. {
  5001. auto result_type = ops[0];
  5002. auto id = ops[1];
  5003. if (expression_type(ops[2]).vecsize > 1)
  5004. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
  5005. else
  5006. HLSL_BOP_CAST(==, int_type);
  5007. break;
  5008. }
  5009. case OpLogicalEqual:
  5010. case OpFOrdEqual:
  5011. case OpFUnordEqual:
  5012. {
  5013. // HLSL != operator is unordered.
  5014. // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
  5015. // isnan() is apparently implemented as x != x as well.
  5016. // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
  5017. // HACK: FUnordEqual will be implemented as FOrdEqual.
  5018. auto result_type = ops[0];
  5019. auto id = ops[1];
  5020. if (expression_type(ops[2]).vecsize > 1)
  5021. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
  5022. else
  5023. HLSL_BOP(==);
  5024. break;
  5025. }
  5026. case OpINotEqual:
  5027. {
  5028. auto result_type = ops[0];
  5029. auto id = ops[1];
  5030. if (expression_type(ops[2]).vecsize > 1)
  5031. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
  5032. else
  5033. HLSL_BOP_CAST(!=, int_type);
  5034. break;
  5035. }
  5036. case OpLogicalNotEqual:
  5037. case OpFOrdNotEqual:
  5038. case OpFUnordNotEqual:
  5039. {
  5040. // HLSL != operator is unordered.
  5041. // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
  5042. // isnan() is apparently implemented as x != x as well.
  5043. // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
  5044. // We would need to do something like not(UnordEqual), but that cannot be expressed either.
  5045. // Adding a lot of NaN checks would be a breaking change from perspective of performance.
  5046. // SPIR-V will generally use isnan() checks when this even matters.
  5047. // HACK: FOrdNotEqual will be implemented as FUnordEqual.
  5048. auto result_type = ops[0];
  5049. auto id = ops[1];
  5050. if (expression_type(ops[2]).vecsize > 1)
  5051. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
  5052. else
  5053. HLSL_BOP(!=);
  5054. break;
  5055. }
  5056. case OpUGreaterThan:
  5057. case OpSGreaterThan:
  5058. {
  5059. auto result_type = ops[0];
  5060. auto id = ops[1];
  5061. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  5062. if (expression_type(ops[2]).vecsize > 1)
  5063. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
  5064. else
  5065. HLSL_BOP_CAST(>, type);
  5066. break;
  5067. }
  5068. case OpFOrdGreaterThan:
  5069. {
  5070. auto result_type = ops[0];
  5071. auto id = ops[1];
  5072. if (expression_type(ops[2]).vecsize > 1)
  5073. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
  5074. else
  5075. HLSL_BOP(>);
  5076. break;
  5077. }
  5078. case OpFUnordGreaterThan:
  5079. {
  5080. auto result_type = ops[0];
  5081. auto id = ops[1];
  5082. if (expression_type(ops[2]).vecsize > 1)
  5083. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
  5084. else
  5085. CompilerGLSL::emit_instruction(instruction);
  5086. break;
  5087. }
  5088. case OpUGreaterThanEqual:
  5089. case OpSGreaterThanEqual:
  5090. {
  5091. auto result_type = ops[0];
  5092. auto id = ops[1];
  5093. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  5094. if (expression_type(ops[2]).vecsize > 1)
  5095. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
  5096. else
  5097. HLSL_BOP_CAST(>=, type);
  5098. break;
  5099. }
  5100. case OpFOrdGreaterThanEqual:
  5101. {
  5102. auto result_type = ops[0];
  5103. auto id = ops[1];
  5104. if (expression_type(ops[2]).vecsize > 1)
  5105. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
  5106. else
  5107. HLSL_BOP(>=);
  5108. break;
  5109. }
  5110. case OpFUnordGreaterThanEqual:
  5111. {
  5112. auto result_type = ops[0];
  5113. auto id = ops[1];
  5114. if (expression_type(ops[2]).vecsize > 1)
  5115. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
  5116. else
  5117. CompilerGLSL::emit_instruction(instruction);
  5118. break;
  5119. }
  5120. case OpULessThan:
  5121. case OpSLessThan:
  5122. {
  5123. auto result_type = ops[0];
  5124. auto id = ops[1];
  5125. auto type = opcode == OpULessThan ? uint_type : int_type;
  5126. if (expression_type(ops[2]).vecsize > 1)
  5127. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
  5128. else
  5129. HLSL_BOP_CAST(<, type);
  5130. break;
  5131. }
  5132. case OpFOrdLessThan:
  5133. {
  5134. auto result_type = ops[0];
  5135. auto id = ops[1];
  5136. if (expression_type(ops[2]).vecsize > 1)
  5137. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
  5138. else
  5139. HLSL_BOP(<);
  5140. break;
  5141. }
  5142. case OpFUnordLessThan:
  5143. {
  5144. auto result_type = ops[0];
  5145. auto id = ops[1];
  5146. if (expression_type(ops[2]).vecsize > 1)
  5147. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
  5148. else
  5149. CompilerGLSL::emit_instruction(instruction);
  5150. break;
  5151. }
  5152. case OpULessThanEqual:
  5153. case OpSLessThanEqual:
  5154. {
  5155. auto result_type = ops[0];
  5156. auto id = ops[1];
  5157. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  5158. if (expression_type(ops[2]).vecsize > 1)
  5159. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
  5160. else
  5161. HLSL_BOP_CAST(<=, type);
  5162. break;
  5163. }
  5164. case OpFOrdLessThanEqual:
  5165. {
  5166. auto result_type = ops[0];
  5167. auto id = ops[1];
  5168. if (expression_type(ops[2]).vecsize > 1)
  5169. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
  5170. else
  5171. HLSL_BOP(<=);
  5172. break;
  5173. }
  5174. case OpFUnordLessThanEqual:
  5175. {
  5176. auto result_type = ops[0];
  5177. auto id = ops[1];
  5178. if (expression_type(ops[2]).vecsize > 1)
  5179. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
  5180. else
  5181. CompilerGLSL::emit_instruction(instruction);
  5182. break;
  5183. }
  5184. case OpImageQueryLod:
  5185. emit_texture_op(instruction, false);
  5186. break;
  5187. case OpImageQuerySizeLod:
  5188. {
  5189. auto result_type = ops[0];
  5190. auto id = ops[1];
  5191. require_texture_query_variant(ops[2]);
  5192. auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
  5193. statement("uint ", dummy_samples_levels, ";");
  5194. auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ",
  5195. bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")");
  5196. auto &restype = get<SPIRType>(ops[0]);
  5197. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5198. emit_op(result_type, id, expr, true);
  5199. break;
  5200. }
  5201. case OpImageQuerySize:
  5202. {
  5203. auto result_type = ops[0];
  5204. auto id = ops[1];
  5205. require_texture_query_variant(ops[2]);
  5206. bool uav = expression_type(ops[2]).image.sampled == 2;
  5207. if (const auto *var = maybe_get_backing_variable(ops[2]))
  5208. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
  5209. uav = false;
  5210. auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
  5211. statement("uint ", dummy_samples_levels, ";");
  5212. string expr;
  5213. if (uav)
  5214. expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")");
  5215. else
  5216. expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
  5217. auto &restype = get<SPIRType>(ops[0]);
  5218. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5219. emit_op(result_type, id, expr, true);
  5220. break;
  5221. }
  5222. case OpImageQuerySamples:
  5223. case OpImageQueryLevels:
  5224. {
  5225. auto result_type = ops[0];
  5226. auto id = ops[1];
  5227. require_texture_query_variant(ops[2]);
  5228. bool uav = expression_type(ops[2]).image.sampled == 2;
  5229. if (opcode == OpImageQueryLevels && uav)
  5230. SPIRV_CROSS_THROW("Cannot query levels for UAV images.");
  5231. if (const auto *var = maybe_get_backing_variable(ops[2]))
  5232. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
  5233. uav = false;
  5234. // Keep it simple and do not emit special variants to make this look nicer ...
  5235. // This stuff is barely, if ever, used.
  5236. forced_temporaries.insert(id);
  5237. auto &type = get<SPIRType>(result_type);
  5238. statement(variable_decl(type, to_name(id)), ";");
  5239. if (uav)
  5240. statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");");
  5241. else
  5242. statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");");
  5243. auto &restype = get<SPIRType>(ops[0]);
  5244. auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id));
  5245. set<SPIRExpression>(id, expr, result_type, true);
  5246. break;
  5247. }
  5248. case OpImageRead:
  5249. {
  5250. uint32_t result_type = ops[0];
  5251. uint32_t id = ops[1];
  5252. auto *var = maybe_get_backing_variable(ops[2]);
  5253. auto &type = expression_type(ops[2]);
  5254. bool subpass_data = type.image.dim == DimSubpassData;
  5255. bool pure = false;
  5256. string imgexpr;
  5257. if (subpass_data)
  5258. {
  5259. if (hlsl_options.shader_model < 40)
  5260. SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3.");
  5261. // Similar to GLSL, implement subpass loads using texelFetch.
  5262. if (type.image.ms)
  5263. {
  5264. uint32_t operands = ops[4];
  5265. if (operands != ImageOperandsSampleMask || instruction.length != 6)
  5266. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
  5267. uint32_t sample = ops[5];
  5268. imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
  5269. }
  5270. else
  5271. imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
  5272. pure = true;
  5273. }
  5274. else
  5275. {
  5276. imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]");
  5277. // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
  5278. // except that the underlying type changes how the data is interpreted.
  5279. bool force_srv =
  5280. hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable);
  5281. pure = force_srv;
  5282. if (var && !subpass_data && !force_srv)
  5283. imgexpr = remap_swizzle(get<SPIRType>(result_type),
  5284. image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
  5285. }
  5286. if (var)
  5287. {
  5288. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  5289. auto &e = emit_op(result_type, id, imgexpr, forward);
  5290. if (!pure)
  5291. {
  5292. e.loaded_from = var->self;
  5293. if (forward)
  5294. var->dependees.push_back(id);
  5295. }
  5296. }
  5297. else
  5298. emit_op(result_type, id, imgexpr, false);
  5299. inherit_expression_dependencies(id, ops[2]);
  5300. if (type.image.ms)
  5301. inherit_expression_dependencies(id, ops[5]);
  5302. break;
  5303. }
  5304. case OpImageWrite:
  5305. {
  5306. auto *var = maybe_get_backing_variable(ops[0]);
  5307. // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
  5308. // except that the underlying type changes how the data is interpreted.
  5309. auto value_expr = to_expression(ops[2]);
  5310. if (var)
  5311. {
  5312. auto &type = get<SPIRType>(var->basetype);
  5313. auto narrowed_type = get<SPIRType>(type.image.type);
  5314. narrowed_type.vecsize = image_format_to_components(type.image.format);
  5315. value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr);
  5316. }
  5317. statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
  5318. if (var && variable_storage_is_aliased(*var))
  5319. flush_all_aliased_variables();
  5320. break;
  5321. }
  5322. case OpImageTexelPointer:
  5323. {
  5324. uint32_t result_type = ops[0];
  5325. uint32_t id = ops[1];
  5326. auto expr = to_expression(ops[2]);
  5327. expr += join("[", to_expression(ops[3]), "]");
  5328. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  5329. // When using the pointer, we need to know which variable it is actually loaded from.
  5330. auto *var = maybe_get_backing_variable(ops[2]);
  5331. e.loaded_from = var ? var->self : ID(0);
  5332. inherit_expression_dependencies(id, ops[3]);
  5333. break;
  5334. }
  5335. case OpAtomicFAddEXT:
  5336. case OpAtomicFMinEXT:
  5337. case OpAtomicFMaxEXT:
  5338. SPIRV_CROSS_THROW("Floating-point atomics are not supported in HLSL.");
  5339. case OpAtomicCompareExchange:
  5340. case OpAtomicExchange:
  5341. case OpAtomicISub:
  5342. case OpAtomicSMin:
  5343. case OpAtomicUMin:
  5344. case OpAtomicSMax:
  5345. case OpAtomicUMax:
  5346. case OpAtomicAnd:
  5347. case OpAtomicOr:
  5348. case OpAtomicXor:
  5349. case OpAtomicIAdd:
  5350. case OpAtomicIIncrement:
  5351. case OpAtomicIDecrement:
  5352. case OpAtomicLoad:
  5353. case OpAtomicStore:
  5354. {
  5355. emit_atomic(ops, instruction.length, opcode);
  5356. break;
  5357. }
  5358. case OpControlBarrier:
  5359. case OpMemoryBarrier:
  5360. {
  5361. uint32_t memory;
  5362. uint32_t semantics;
  5363. if (opcode == OpMemoryBarrier)
  5364. {
  5365. memory = evaluate_constant_u32(ops[0]);
  5366. semantics = evaluate_constant_u32(ops[1]);
  5367. }
  5368. else
  5369. {
  5370. memory = evaluate_constant_u32(ops[1]);
  5371. semantics = evaluate_constant_u32(ops[2]);
  5372. }
  5373. if (memory == ScopeSubgroup)
  5374. {
  5375. // No Wave-barriers in HLSL.
  5376. break;
  5377. }
  5378. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  5379. semantics = mask_relevant_memory_semantics(semantics);
  5380. if (opcode == OpMemoryBarrier)
  5381. {
  5382. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  5383. // does what we need, so we avoid redundant barriers.
  5384. const Instruction *next = get_next_instruction_in_block(instruction);
  5385. if (next && next->op == OpControlBarrier)
  5386. {
  5387. auto *next_ops = stream(*next);
  5388. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  5389. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  5390. next_semantics = mask_relevant_memory_semantics(next_semantics);
  5391. // There is no "just execution barrier" in HLSL.
  5392. // If there are no memory semantics for next instruction, we will imply group shared memory is synced.
  5393. if (next_semantics == 0)
  5394. next_semantics = MemorySemanticsWorkgroupMemoryMask;
  5395. bool memory_scope_covered = false;
  5396. if (next_memory == memory)
  5397. memory_scope_covered = true;
  5398. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  5399. {
  5400. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  5401. // scope does not have to match.
  5402. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  5403. (memory == ScopeDevice || memory == ScopeWorkgroup))
  5404. {
  5405. memory_scope_covered = true;
  5406. }
  5407. }
  5408. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  5409. {
  5410. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  5411. memory_scope_covered = true;
  5412. }
  5413. // If we have the same memory scope, and all memory types are covered, we're good.
  5414. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  5415. break;
  5416. }
  5417. }
  5418. // We are synchronizing some memory or syncing execution,
  5419. // so we cannot forward any loads beyond the memory barrier.
  5420. if (semantics || opcode == OpControlBarrier)
  5421. {
  5422. assert(current_emitting_block);
  5423. flush_control_dependent_expressions(current_emitting_block->self);
  5424. flush_all_active_variables();
  5425. }
  5426. if (opcode == OpControlBarrier)
  5427. {
  5428. // We cannot emit just execution barrier, for no memory semantics pick the cheapest option.
  5429. if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0)
  5430. statement("GroupMemoryBarrierWithGroupSync();");
  5431. else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
  5432. statement("DeviceMemoryBarrierWithGroupSync();");
  5433. else
  5434. statement("AllMemoryBarrierWithGroupSync();");
  5435. }
  5436. else
  5437. {
  5438. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  5439. statement("GroupMemoryBarrier();");
  5440. else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
  5441. statement("DeviceMemoryBarrier();");
  5442. else
  5443. statement("AllMemoryBarrier();");
  5444. }
  5445. break;
  5446. }
  5447. case OpBitFieldInsert:
  5448. {
  5449. if (!requires_bitfield_insert)
  5450. {
  5451. requires_bitfield_insert = true;
  5452. force_recompile();
  5453. }
  5454. auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
  5455. to_expression(ops[4]), ", ", to_expression(ops[5]), ")");
  5456. bool forward =
  5457. should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]);
  5458. auto &restype = get<SPIRType>(ops[0]);
  5459. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5460. emit_op(ops[0], ops[1], expr, forward);
  5461. break;
  5462. }
  5463. case OpBitFieldSExtract:
  5464. case OpBitFieldUExtract:
  5465. {
  5466. if (!requires_bitfield_extract)
  5467. {
  5468. requires_bitfield_extract = true;
  5469. force_recompile();
  5470. }
  5471. if (opcode == OpBitFieldSExtract)
  5472. HLSL_TFOP(spvBitfieldSExtract);
  5473. else
  5474. HLSL_TFOP(spvBitfieldUExtract);
  5475. break;
  5476. }
  5477. case OpBitCount:
  5478. {
  5479. auto basetype = expression_type(ops[2]).basetype;
  5480. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype);
  5481. break;
  5482. }
  5483. case OpBitReverse:
  5484. HLSL_UFOP(reversebits);
  5485. break;
  5486. case OpArrayLength:
  5487. {
  5488. auto *var = maybe_get_backing_variable(ops[2]);
  5489. if (!var)
  5490. SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
  5491. auto &type = get<SPIRType>(var->basetype);
  5492. if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock))
  5493. SPIRV_CROSS_THROW("Array length expression must point to a block type.");
  5494. // This must be 32-bit uint, so we're good to go.
  5495. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  5496. statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
  5497. uint32_t offset = type_struct_member_offset(type, ops[3]);
  5498. uint32_t stride = type_struct_member_array_stride(type, ops[3]);
  5499. statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";");
  5500. break;
  5501. }
  5502. case OpIsHelperInvocationEXT:
  5503. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  5504. SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
  5505. // Helper lane state with demote is volatile by nature.
  5506. // Do not forward this.
  5507. emit_op(ops[0], ops[1], "IsHelperLane()", false);
  5508. break;
  5509. case OpBeginInvocationInterlockEXT:
  5510. case OpEndInvocationInterlockEXT:
  5511. if (hlsl_options.shader_model < 51)
  5512. SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
  5513. break; // Nothing to do in the body
  5514. case OpRayQueryInitializeKHR:
  5515. {
  5516. flush_variable_declaration(ops[0]);
  5517. std::string ray_desc_name = get_unique_identifier();
  5518. statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  5519. to_expression(ops[6]), ", ", to_expression(ops[7]), "};");
  5520. statement(to_expression(ops[0]), ".TraceRayInline(",
  5521. to_expression(ops[1]), ", ", // acc structure
  5522. to_expression(ops[2]), ", ", // ray flags
  5523. to_expression(ops[3]), ", ", // mask
  5524. ray_desc_name, ");"); // ray
  5525. break;
  5526. }
  5527. case OpRayQueryProceedKHR:
  5528. {
  5529. flush_variable_declaration(ops[0]);
  5530. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false);
  5531. break;
  5532. }
  5533. case OpRayQueryTerminateKHR:
  5534. {
  5535. flush_variable_declaration(ops[0]);
  5536. statement(to_expression(ops[0]), ".Abort();");
  5537. break;
  5538. }
  5539. case OpRayQueryGenerateIntersectionKHR:
  5540. {
  5541. flush_variable_declaration(ops[0]);
  5542. statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", to_expression(ops[1]), ");");
  5543. break;
  5544. }
  5545. case OpRayQueryConfirmIntersectionKHR:
  5546. {
  5547. flush_variable_declaration(ops[0]);
  5548. statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();");
  5549. break;
  5550. }
  5551. case OpRayQueryGetIntersectionTypeKHR:
  5552. {
  5553. emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops);
  5554. break;
  5555. }
  5556. case OpRayQueryGetIntersectionTKHR:
  5557. {
  5558. emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops);
  5559. break;
  5560. }
  5561. case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
  5562. {
  5563. emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops);
  5564. break;
  5565. }
  5566. case OpRayQueryGetIntersectionInstanceIdKHR:
  5567. {
  5568. emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops);
  5569. break;
  5570. }
  5571. case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
  5572. {
  5573. emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()",
  5574. ".CandidateInstanceContributionToHitGroupIndex()", ops);
  5575. break;
  5576. }
  5577. case OpRayQueryGetIntersectionGeometryIndexKHR:
  5578. {
  5579. emit_rayquery_function(".CommittedGeometryIndex()",
  5580. ".CandidateGeometryIndex()", ops);
  5581. break;
  5582. }
  5583. case OpRayQueryGetIntersectionPrimitiveIndexKHR:
  5584. {
  5585. emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops);
  5586. break;
  5587. }
  5588. case OpRayQueryGetIntersectionBarycentricsKHR:
  5589. {
  5590. emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops);
  5591. break;
  5592. }
  5593. case OpRayQueryGetIntersectionFrontFaceKHR:
  5594. {
  5595. emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops);
  5596. break;
  5597. }
  5598. case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
  5599. {
  5600. flush_variable_declaration(ops[0]);
  5601. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false);
  5602. break;
  5603. }
  5604. case OpRayQueryGetIntersectionObjectRayDirectionKHR:
  5605. {
  5606. emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops);
  5607. break;
  5608. }
  5609. case OpRayQueryGetIntersectionObjectRayOriginKHR:
  5610. {
  5611. flush_variable_declaration(ops[0]);
  5612. emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops);
  5613. break;
  5614. }
  5615. case OpRayQueryGetIntersectionObjectToWorldKHR:
  5616. {
  5617. emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops);
  5618. break;
  5619. }
  5620. case OpRayQueryGetIntersectionWorldToObjectKHR:
  5621. {
  5622. emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops);
  5623. break;
  5624. }
  5625. case OpRayQueryGetRayFlagsKHR:
  5626. {
  5627. flush_variable_declaration(ops[0]);
  5628. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false);
  5629. break;
  5630. }
  5631. case OpRayQueryGetRayTMinKHR:
  5632. {
  5633. flush_variable_declaration(ops[0]);
  5634. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false);
  5635. break;
  5636. }
  5637. case OpRayQueryGetWorldRayOriginKHR:
  5638. {
  5639. flush_variable_declaration(ops[0]);
  5640. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false);
  5641. break;
  5642. }
  5643. case OpRayQueryGetWorldRayDirectionKHR:
  5644. {
  5645. flush_variable_declaration(ops[0]);
  5646. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false);
  5647. break;
  5648. }
  5649. case OpSetMeshOutputsEXT:
  5650. {
  5651. statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
  5652. break;
  5653. }
  5654. default:
  5655. CompilerGLSL::emit_instruction(instruction);
  5656. break;
  5657. }
  5658. }
  5659. void CompilerHLSL::require_texture_query_variant(uint32_t var_id)
  5660. {
  5661. if (const auto *var = maybe_get_backing_variable(var_id))
  5662. var_id = var->self;
  5663. auto &type = expression_type(var_id);
  5664. bool uav = type.image.sampled == 2;
  5665. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable))
  5666. uav = false;
  5667. uint32_t bit = 0;
  5668. switch (type.image.dim)
  5669. {
  5670. case Dim1D:
  5671. bit = type.image.arrayed ? Query1DArray : Query1D;
  5672. break;
  5673. case Dim2D:
  5674. if (type.image.ms)
  5675. bit = type.image.arrayed ? Query2DMSArray : Query2DMS;
  5676. else
  5677. bit = type.image.arrayed ? Query2DArray : Query2D;
  5678. break;
  5679. case Dim3D:
  5680. bit = Query3D;
  5681. break;
  5682. case DimCube:
  5683. bit = type.image.arrayed ? QueryCubeArray : QueryCube;
  5684. break;
  5685. case DimBuffer:
  5686. bit = QueryBuffer;
  5687. break;
  5688. default:
  5689. SPIRV_CROSS_THROW("Unsupported query type.");
  5690. }
  5691. switch (get<SPIRType>(type.image.type).basetype)
  5692. {
  5693. case SPIRType::Float:
  5694. bit += QueryTypeFloat;
  5695. break;
  5696. case SPIRType::Int:
  5697. bit += QueryTypeInt;
  5698. break;
  5699. case SPIRType::UInt:
  5700. bit += QueryTypeUInt;
  5701. break;
  5702. default:
  5703. SPIRV_CROSS_THROW("Unsupported query type.");
  5704. }
  5705. auto norm_state = image_format_to_normalized_state(type.image.format);
  5706. auto &variant = uav ? required_texture_size_variants
  5707. .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] :
  5708. required_texture_size_variants.srv;
  5709. uint64_t mask = 1ull << bit;
  5710. if ((variant & mask) == 0)
  5711. {
  5712. force_recompile();
  5713. variant |= mask;
  5714. }
  5715. }
  5716. void CompilerHLSL::set_root_constant_layouts(std::vector<RootConstants> layout)
  5717. {
  5718. root_constants_layout = std::move(layout);
  5719. }
  5720. void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes)
  5721. {
  5722. remap_vertex_attributes.push_back(vertex_attributes);
  5723. }
  5724. VariableID CompilerHLSL::remap_num_workgroups_builtin()
  5725. {
  5726. update_active_builtins();
  5727. if (!active_input_builtins.get(BuiltInNumWorkgroups))
  5728. return 0;
  5729. // Create a new, fake UBO.
  5730. uint32_t offset = ir.increase_bound_by(4);
  5731. uint32_t uint_type_id = offset;
  5732. uint32_t block_type_id = offset + 1;
  5733. uint32_t block_pointer_type_id = offset + 2;
  5734. uint32_t variable_id = offset + 3;
  5735. SPIRType uint_type;
  5736. uint_type.basetype = SPIRType::UInt;
  5737. uint_type.width = 32;
  5738. uint_type.vecsize = 3;
  5739. uint_type.columns = 1;
  5740. set<SPIRType>(uint_type_id, uint_type);
  5741. SPIRType block_type;
  5742. block_type.basetype = SPIRType::Struct;
  5743. block_type.member_types.push_back(uint_type_id);
  5744. set<SPIRType>(block_type_id, block_type);
  5745. set_decoration(block_type_id, DecorationBlock);
  5746. set_member_name(block_type_id, 0, "count");
  5747. set_member_decoration(block_type_id, 0, DecorationOffset, 0);
  5748. SPIRType block_pointer_type = block_type;
  5749. block_pointer_type.pointer = true;
  5750. block_pointer_type.storage = StorageClassUniform;
  5751. block_pointer_type.parent_type = block_type_id;
  5752. auto &ptr_type = set<SPIRType>(block_pointer_type_id, block_pointer_type);
  5753. // Preserve self.
  5754. ptr_type.self = block_type_id;
  5755. set<SPIRVariable>(variable_id, block_pointer_type_id, StorageClassUniform);
  5756. ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups";
  5757. num_workgroups_builtin = variable_id;
  5758. get_entry_point().interface_variables.push_back(num_workgroups_builtin);
  5759. return variable_id;
  5760. }
  5761. void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags)
  5762. {
  5763. resource_binding_flags = flags;
  5764. }
  5765. void CompilerHLSL::validate_shader_model()
  5766. {
  5767. // Check for nonuniform qualifier.
  5768. // Instead of looping over all decorations to find this, just look at capabilities.
  5769. for (auto &cap : ir.declared_capabilities)
  5770. {
  5771. switch (cap)
  5772. {
  5773. case CapabilityShaderNonUniformEXT:
  5774. case CapabilityRuntimeDescriptorArrayEXT:
  5775. if (hlsl_options.shader_model < 51)
  5776. SPIRV_CROSS_THROW(
  5777. "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex.");
  5778. break;
  5779. case CapabilityVariablePointers:
  5780. case CapabilityVariablePointersStorageBuffer:
  5781. SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL.");
  5782. default:
  5783. break;
  5784. }
  5785. }
  5786. if (ir.addressing_model != AddressingModelLogical)
  5787. SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL.");
  5788. if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62)
  5789. SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support.");
  5790. }
  5791. string CompilerHLSL::compile()
  5792. {
  5793. ir.fixup_reserved_names();
  5794. // Do not deal with ES-isms like precision, older extensions and such.
  5795. options.es = false;
  5796. options.version = 450;
  5797. options.vulkan_semantics = true;
  5798. backend.float_literal_suffix = true;
  5799. backend.double_literal_suffix = false;
  5800. backend.long_long_literal_suffix = true;
  5801. backend.uint32_t_literal_suffix = true;
  5802. backend.int16_t_literal_suffix = "";
  5803. backend.uint16_t_literal_suffix = "u";
  5804. backend.basic_int_type = "int";
  5805. backend.basic_uint_type = "uint";
  5806. backend.demote_literal = "discard";
  5807. backend.boolean_mix_function = "";
  5808. backend.swizzle_is_function = false;
  5809. backend.shared_is_implied = true;
  5810. backend.unsized_array_supported = true;
  5811. backend.explicit_struct_type = false;
  5812. backend.use_initializer_list = true;
  5813. backend.use_constructor_splatting = false;
  5814. backend.can_swizzle_scalar = true;
  5815. backend.can_declare_struct_inline = false;
  5816. backend.can_declare_arrays_inline = false;
  5817. backend.can_return_array = false;
  5818. backend.nonuniform_qualifier = "NonUniformResourceIndex";
  5819. backend.support_case_fallthrough = false;
  5820. backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT;
  5821. backend.force_gl_in_out_block = backend.force_merged_mesh_block;
  5822. // SM 4.1 does not support precise for some reason.
  5823. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
  5824. fixup_anonymous_struct_names();
  5825. fixup_type_alias();
  5826. reorder_type_alias();
  5827. build_function_control_flow_graphs_and_analyze();
  5828. validate_shader_model();
  5829. update_active_builtins();
  5830. analyze_image_and_sampler_usage();
  5831. analyze_interlocked_resource_usage();
  5832. if (get_execution_model() == ExecutionModelMeshEXT)
  5833. analyze_meshlet_writes();
  5834. // Subpass input needs SV_Position.
  5835. if (need_subpass_input)
  5836. active_input_builtins.set(BuiltInFragCoord);
  5837. uint32_t pass_count = 0;
  5838. do
  5839. {
  5840. reset(pass_count);
  5841. // Move constructor for this type is broken on GCC 4.9 ...
  5842. buffer.reset();
  5843. emit_header();
  5844. emit_resources();
  5845. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  5846. emit_hlsl_entry_point();
  5847. pass_count++;
  5848. } while (is_forcing_recompilation());
  5849. // Entry point in HLSL is always main() for the time being.
  5850. get_entry_point().name = "main";
  5851. return buffer.str();
  5852. }
  5853. void CompilerHLSL::emit_block_hints(const SPIRBlock &block)
  5854. {
  5855. switch (block.hint)
  5856. {
  5857. case SPIRBlock::HintFlatten:
  5858. statement("[flatten]");
  5859. break;
  5860. case SPIRBlock::HintDontFlatten:
  5861. statement("[branch]");
  5862. break;
  5863. case SPIRBlock::HintUnroll:
  5864. statement("[unroll]");
  5865. break;
  5866. case SPIRBlock::HintDontUnroll:
  5867. statement("[loop]");
  5868. break;
  5869. default:
  5870. break;
  5871. }
  5872. }
  5873. string CompilerHLSL::get_unique_identifier()
  5874. {
  5875. return join("_", unique_identifier_count++, "ident");
  5876. }
  5877. void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding)
  5878. {
  5879. StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
  5880. resource_bindings[tuple] = { binding, false };
  5881. }
  5882. bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
  5883. {
  5884. StageSetBinding tuple = { model, desc_set, binding };
  5885. auto itr = resource_bindings.find(tuple);
  5886. return itr != end(resource_bindings) && itr->second.second;
  5887. }
  5888. CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0)
  5889. {
  5890. auto &rslt_type = get<SPIRType>(result_type);
  5891. auto &expr_type = expression_type(op0);
  5892. if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt &&
  5893. expr_type.vecsize == 2)
  5894. return BitcastType::TypePackUint2x32;
  5895. else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 &&
  5896. expr_type.basetype == SPIRType::BaseType::UInt64)
  5897. return BitcastType::TypeUnpackUint64;
  5898. return BitcastType::TypeNormal;
  5899. }
  5900. bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const
  5901. {
  5902. if (hlsl_options.force_storage_buffer_as_uav)
  5903. {
  5904. return true;
  5905. }
  5906. const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet);
  5907. const uint32_t binding = get_decoration(id, spv::DecorationBinding);
  5908. return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end());
  5909. }
  5910. void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding)
  5911. {
  5912. SetBindingPair pair = { desc_set, binding };
  5913. force_uav_buffer_bindings.insert(pair);
  5914. }
  5915. bool CompilerHLSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
  5916. {
  5917. return (builtin == BuiltInSampleMask);
  5918. }
  5919. bool CompilerHLSL::is_user_type_structured(uint32_t id) const
  5920. {
  5921. if (hlsl_options.preserve_structured_buffers)
  5922. {
  5923. // Compare left hand side of string only as these user types can contain more meta data such as their subtypes,
  5924. // e.g. "structuredbuffer:int"
  5925. const std::string &user_type = get_decoration_string(id, DecorationUserTypeGOOGLE);
  5926. return user_type.compare(0, 16, "structuredbuffer") == 0 ||
  5927. user_type.compare(0, 18, "rwstructuredbuffer") == 0 ||
  5928. user_type.compare(0, 33, "rasterizerorderedstructuredbuffer") == 0;
  5929. }
  5930. return false;
  5931. }