spirv_hlsl.cpp 215 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240
  1. /*
  2. * Copyright 2016-2021 Robert Konrad
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. /*
  19. * At your option, you may choose to accept this material under either:
  20. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  21. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  22. */
  23. #include "spirv_hlsl.hpp"
  24. #include "GLSL.std.450.h"
  25. #include <algorithm>
  26. #include <assert.h>
  27. using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE;
  28. using namespace SPIRV_CROSS_NAMESPACE;
  29. using namespace std;
  30. enum class ImageFormatNormalizedState
  31. {
  32. None = 0,
  33. Unorm = 1,
  34. Snorm = 2
  35. };
  36. static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt)
  37. {
  38. switch (fmt)
  39. {
  40. case ImageFormatR8:
  41. case ImageFormatR16:
  42. case ImageFormatRg8:
  43. case ImageFormatRg16:
  44. case ImageFormatRgba8:
  45. case ImageFormatRgba16:
  46. case ImageFormatRgb10A2:
  47. return ImageFormatNormalizedState::Unorm;
  48. case ImageFormatR8Snorm:
  49. case ImageFormatR16Snorm:
  50. case ImageFormatRg8Snorm:
  51. case ImageFormatRg16Snorm:
  52. case ImageFormatRgba8Snorm:
  53. case ImageFormatRgba16Snorm:
  54. return ImageFormatNormalizedState::Snorm;
  55. default:
  56. break;
  57. }
  58. return ImageFormatNormalizedState::None;
  59. }
  60. static unsigned image_format_to_components(ImageFormat fmt)
  61. {
  62. switch (fmt)
  63. {
  64. case ImageFormatR8:
  65. case ImageFormatR16:
  66. case ImageFormatR8Snorm:
  67. case ImageFormatR16Snorm:
  68. case ImageFormatR16f:
  69. case ImageFormatR32f:
  70. case ImageFormatR8i:
  71. case ImageFormatR16i:
  72. case ImageFormatR32i:
  73. case ImageFormatR8ui:
  74. case ImageFormatR16ui:
  75. case ImageFormatR32ui:
  76. return 1;
  77. case ImageFormatRg8:
  78. case ImageFormatRg16:
  79. case ImageFormatRg8Snorm:
  80. case ImageFormatRg16Snorm:
  81. case ImageFormatRg16f:
  82. case ImageFormatRg32f:
  83. case ImageFormatRg8i:
  84. case ImageFormatRg16i:
  85. case ImageFormatRg32i:
  86. case ImageFormatRg8ui:
  87. case ImageFormatRg16ui:
  88. case ImageFormatRg32ui:
  89. return 2;
  90. case ImageFormatR11fG11fB10f:
  91. return 3;
  92. case ImageFormatRgba8:
  93. case ImageFormatRgba16:
  94. case ImageFormatRgb10A2:
  95. case ImageFormatRgba8Snorm:
  96. case ImageFormatRgba16Snorm:
  97. case ImageFormatRgba16f:
  98. case ImageFormatRgba32f:
  99. case ImageFormatRgba8i:
  100. case ImageFormatRgba16i:
  101. case ImageFormatRgba32i:
  102. case ImageFormatRgba8ui:
  103. case ImageFormatRgba16ui:
  104. case ImageFormatRgba32ui:
  105. case ImageFormatRgb10a2ui:
  106. return 4;
  107. case ImageFormatUnknown:
  108. return 4; // Assume 4.
  109. default:
  110. SPIRV_CROSS_THROW("Unrecognized typed image format.");
  111. }
  112. }
  113. static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
  114. {
  115. switch (fmt)
  116. {
  117. case ImageFormatR8:
  118. case ImageFormatR16:
  119. if (basetype != SPIRType::Float)
  120. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  121. return "unorm float";
  122. case ImageFormatRg8:
  123. case ImageFormatRg16:
  124. if (basetype != SPIRType::Float)
  125. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  126. return "unorm float2";
  127. case ImageFormatRgba8:
  128. case ImageFormatRgba16:
  129. if (basetype != SPIRType::Float)
  130. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  131. return "unorm float4";
  132. case ImageFormatRgb10A2:
  133. if (basetype != SPIRType::Float)
  134. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  135. return "unorm float4";
  136. case ImageFormatR8Snorm:
  137. case ImageFormatR16Snorm:
  138. if (basetype != SPIRType::Float)
  139. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  140. return "snorm float";
  141. case ImageFormatRg8Snorm:
  142. case ImageFormatRg16Snorm:
  143. if (basetype != SPIRType::Float)
  144. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  145. return "snorm float2";
  146. case ImageFormatRgba8Snorm:
  147. case ImageFormatRgba16Snorm:
  148. if (basetype != SPIRType::Float)
  149. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  150. return "snorm float4";
  151. case ImageFormatR16f:
  152. case ImageFormatR32f:
  153. if (basetype != SPIRType::Float)
  154. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  155. return "float";
  156. case ImageFormatRg16f:
  157. case ImageFormatRg32f:
  158. if (basetype != SPIRType::Float)
  159. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  160. return "float2";
  161. case ImageFormatRgba16f:
  162. case ImageFormatRgba32f:
  163. if (basetype != SPIRType::Float)
  164. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  165. return "float4";
  166. case ImageFormatR11fG11fB10f:
  167. if (basetype != SPIRType::Float)
  168. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  169. return "float3";
  170. case ImageFormatR8i:
  171. case ImageFormatR16i:
  172. case ImageFormatR32i:
  173. if (basetype != SPIRType::Int)
  174. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  175. return "int";
  176. case ImageFormatRg8i:
  177. case ImageFormatRg16i:
  178. case ImageFormatRg32i:
  179. if (basetype != SPIRType::Int)
  180. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  181. return "int2";
  182. case ImageFormatRgba8i:
  183. case ImageFormatRgba16i:
  184. case ImageFormatRgba32i:
  185. if (basetype != SPIRType::Int)
  186. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  187. return "int4";
  188. case ImageFormatR8ui:
  189. case ImageFormatR16ui:
  190. case ImageFormatR32ui:
  191. if (basetype != SPIRType::UInt)
  192. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  193. return "uint";
  194. case ImageFormatRg8ui:
  195. case ImageFormatRg16ui:
  196. case ImageFormatRg32ui:
  197. if (basetype != SPIRType::UInt)
  198. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  199. return "uint2";
  200. case ImageFormatRgba8ui:
  201. case ImageFormatRgba16ui:
  202. case ImageFormatRgba32ui:
  203. if (basetype != SPIRType::UInt)
  204. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  205. return "uint4";
  206. case ImageFormatRgb10a2ui:
  207. if (basetype != SPIRType::UInt)
  208. SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
  209. return "uint4";
  210. case ImageFormatUnknown:
  211. switch (basetype)
  212. {
  213. case SPIRType::Float:
  214. return "float4";
  215. case SPIRType::Int:
  216. return "int4";
  217. case SPIRType::UInt:
  218. return "uint4";
  219. default:
  220. SPIRV_CROSS_THROW("Unsupported base type for image.");
  221. }
  222. default:
  223. SPIRV_CROSS_THROW("Unrecognized typed image format.");
  224. }
  225. }
  226. string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
  227. {
  228. auto &imagetype = get<SPIRType>(type.image.type);
  229. const char *dim = nullptr;
  230. bool typed_load = false;
  231. uint32_t components = 4;
  232. bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable);
  233. switch (type.image.dim)
  234. {
  235. case Dim1D:
  236. typed_load = type.image.sampled == 2;
  237. dim = "1D";
  238. break;
  239. case Dim2D:
  240. typed_load = type.image.sampled == 2;
  241. dim = "2D";
  242. break;
  243. case Dim3D:
  244. typed_load = type.image.sampled == 2;
  245. dim = "3D";
  246. break;
  247. case DimCube:
  248. if (type.image.sampled == 2)
  249. SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL.");
  250. dim = "Cube";
  251. break;
  252. case DimRect:
  253. SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO
  254. case DimBuffer:
  255. if (type.image.sampled == 1)
  256. return join("Buffer<", type_to_glsl(imagetype), components, ">");
  257. else if (type.image.sampled == 2)
  258. {
  259. if (interlocked_resources.count(id))
  260. return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
  261. ">");
  262. typed_load = !force_image_srv && type.image.sampled == 2;
  263. const char *rw = force_image_srv ? "" : "RW";
  264. return join(rw, "Buffer<",
  265. typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
  266. join(type_to_glsl(imagetype), components),
  267. ">");
  268. }
  269. else
  270. SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
  271. case DimSubpassData:
  272. dim = "2D";
  273. typed_load = false;
  274. break;
  275. default:
  276. SPIRV_CROSS_THROW("Invalid dimension.");
  277. }
  278. const char *arrayed = type.image.arrayed ? "Array" : "";
  279. const char *ms = type.image.ms ? "MS" : "";
  280. const char *rw = typed_load && !force_image_srv ? "RW" : "";
  281. if (force_image_srv)
  282. typed_load = false;
  283. if (typed_load && interlocked_resources.count(id))
  284. rw = "RasterizerOrdered";
  285. return join(rw, "Texture", dim, ms, arrayed, "<",
  286. typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
  287. join(type_to_glsl(imagetype), components),
  288. ">");
  289. }
  290. string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/)
  291. {
  292. auto &imagetype = get<SPIRType>(type.image.type);
  293. string res;
  294. switch (imagetype.basetype)
  295. {
  296. case SPIRType::Int:
  297. res = "i";
  298. break;
  299. case SPIRType::UInt:
  300. res = "u";
  301. break;
  302. default:
  303. break;
  304. }
  305. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  306. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  307. // If we're emulating subpassInput with samplers, force sampler2D
  308. // so we don't have to specify format.
  309. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  310. {
  311. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  312. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  313. res += "sampler";
  314. else
  315. res += type.image.sampled == 2 ? "image" : "texture";
  316. }
  317. else
  318. res += "sampler";
  319. switch (type.image.dim)
  320. {
  321. case Dim1D:
  322. res += "1D";
  323. break;
  324. case Dim2D:
  325. res += "2D";
  326. break;
  327. case Dim3D:
  328. res += "3D";
  329. break;
  330. case DimCube:
  331. res += "CUBE";
  332. break;
  333. case DimBuffer:
  334. res += "Buffer";
  335. break;
  336. case DimSubpassData:
  337. res += "2D";
  338. break;
  339. default:
  340. SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
  341. }
  342. if (type.image.ms)
  343. res += "MS";
  344. if (type.image.arrayed)
  345. res += "Array";
  346. return res;
  347. }
  348. string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id)
  349. {
  350. if (hlsl_options.shader_model <= 30)
  351. return image_type_hlsl_legacy(type, id);
  352. else
  353. return image_type_hlsl_modern(type, id);
  354. }
  355. // The optional id parameter indicates the object whose type we are trying
  356. // to find the description for. It is optional. Most type descriptions do not
  357. // depend on a specific object's use of that type.
  358. string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  359. {
  360. // Ignore the pointer type since GLSL doesn't have pointers.
  361. switch (type.basetype)
  362. {
  363. case SPIRType::Struct:
  364. // Need OpName lookup here to get a "sensible" name for a struct.
  365. if (backend.explicit_struct_type)
  366. return join("struct ", to_name(type.self));
  367. else
  368. return to_name(type.self);
  369. case SPIRType::Image:
  370. case SPIRType::SampledImage:
  371. return image_type_hlsl(type, id);
  372. case SPIRType::Sampler:
  373. return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState";
  374. case SPIRType::Void:
  375. return "void";
  376. default:
  377. break;
  378. }
  379. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  380. {
  381. switch (type.basetype)
  382. {
  383. case SPIRType::Boolean:
  384. return "bool";
  385. case SPIRType::Int:
  386. return backend.basic_int_type;
  387. case SPIRType::UInt:
  388. return backend.basic_uint_type;
  389. case SPIRType::AtomicCounter:
  390. return "atomic_uint";
  391. case SPIRType::Half:
  392. if (hlsl_options.enable_16bit_types)
  393. return "half";
  394. else
  395. return "min16float";
  396. case SPIRType::Short:
  397. if (hlsl_options.enable_16bit_types)
  398. return "int16_t";
  399. else
  400. return "min16int";
  401. case SPIRType::UShort:
  402. if (hlsl_options.enable_16bit_types)
  403. return "uint16_t";
  404. else
  405. return "min16uint";
  406. case SPIRType::Float:
  407. return "float";
  408. case SPIRType::Double:
  409. return "double";
  410. case SPIRType::Int64:
  411. if (hlsl_options.shader_model < 60)
  412. SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
  413. return "int64_t";
  414. case SPIRType::UInt64:
  415. if (hlsl_options.shader_model < 60)
  416. SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
  417. return "uint64_t";
  418. case SPIRType::AccelerationStructure:
  419. return "RaytracingAccelerationStructure";
  420. case SPIRType::RayQuery:
  421. return "RayQuery<RAY_FLAG_NONE>";
  422. default:
  423. return "???";
  424. }
  425. }
  426. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  427. {
  428. switch (type.basetype)
  429. {
  430. case SPIRType::Boolean:
  431. return join("bool", type.vecsize);
  432. case SPIRType::Int:
  433. return join("int", type.vecsize);
  434. case SPIRType::UInt:
  435. return join("uint", type.vecsize);
  436. case SPIRType::Half:
  437. return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize);
  438. case SPIRType::Short:
  439. return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize);
  440. case SPIRType::UShort:
  441. return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize);
  442. case SPIRType::Float:
  443. return join("float", type.vecsize);
  444. case SPIRType::Double:
  445. return join("double", type.vecsize);
  446. case SPIRType::Int64:
  447. return join("int64_t", type.vecsize);
  448. case SPIRType::UInt64:
  449. return join("uint64_t", type.vecsize);
  450. default:
  451. return "???";
  452. }
  453. }
  454. else
  455. {
  456. switch (type.basetype)
  457. {
  458. case SPIRType::Boolean:
  459. return join("bool", type.columns, "x", type.vecsize);
  460. case SPIRType::Int:
  461. return join("int", type.columns, "x", type.vecsize);
  462. case SPIRType::UInt:
  463. return join("uint", type.columns, "x", type.vecsize);
  464. case SPIRType::Half:
  465. return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize);
  466. case SPIRType::Short:
  467. return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize);
  468. case SPIRType::UShort:
  469. return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize);
  470. case SPIRType::Float:
  471. return join("float", type.columns, "x", type.vecsize);
  472. case SPIRType::Double:
  473. return join("double", type.columns, "x", type.vecsize);
  474. // Matrix types not supported for int64/uint64.
  475. default:
  476. return "???";
  477. }
  478. }
  479. }
  480. void CompilerHLSL::emit_header()
  481. {
  482. for (auto &header : header_lines)
  483. statement(header);
  484. if (header_lines.size() > 0)
  485. {
  486. statement("");
  487. }
  488. }
  489. void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var)
  490. {
  491. add_resource_name(var.self);
  492. // The global copies of I/O variables should not contain interpolation qualifiers.
  493. // These are emitted inside the interface structs.
  494. auto &flags = ir.meta[var.self].decoration.decoration_flags;
  495. auto old_flags = flags;
  496. flags.reset();
  497. statement("static ", variable_decl(var), ";");
  498. flags = old_flags;
  499. }
  500. const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  501. {
  502. // Input and output variables are handled specially in HLSL backend.
  503. // The variables are declared as global, private variables, and do not need any qualifiers.
  504. if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  505. var.storage == StorageClassPushConstant)
  506. {
  507. return "uniform ";
  508. }
  509. return "";
  510. }
  511. void CompilerHLSL::emit_builtin_outputs_in_struct()
  512. {
  513. auto &execution = get_entry_point();
  514. bool legacy = hlsl_options.shader_model <= 30;
  515. active_output_builtins.for_each_bit([&](uint32_t i) {
  516. const char *type = nullptr;
  517. const char *semantic = nullptr;
  518. auto builtin = static_cast<BuiltIn>(i);
  519. switch (builtin)
  520. {
  521. case BuiltInPosition:
  522. type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
  523. semantic = legacy ? "POSITION" : "SV_Position";
  524. break;
  525. case BuiltInSampleMask:
  526. if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment)
  527. SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher.");
  528. type = "uint";
  529. semantic = "SV_Coverage";
  530. break;
  531. case BuiltInFragDepth:
  532. type = "float";
  533. if (legacy)
  534. {
  535. semantic = "DEPTH";
  536. }
  537. else
  538. {
  539. if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater))
  540. semantic = "SV_DepthGreaterEqual";
  541. else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess))
  542. semantic = "SV_DepthLessEqual";
  543. else
  544. semantic = "SV_Depth";
  545. }
  546. break;
  547. case BuiltInClipDistance:
  548. {
  549. static const char *types[] = { "float", "float2", "float3", "float4" };
  550. // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
  551. if (execution.model == ExecutionModelMeshEXT)
  552. {
  553. if (clip_distance_count > 4)
  554. SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders.");
  555. if (clip_distance_count == 1)
  556. {
  557. // Avoids having to hack up access_chain code. Makes it trivially indexable.
  558. statement("float gl_ClipDistance[1] : SV_ClipDistance;");
  559. }
  560. else
  561. {
  562. // Replace array with vector directly, avoids any weird fixup path.
  563. statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;");
  564. }
  565. }
  566. else
  567. {
  568. for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
  569. {
  570. uint32_t to_declare = clip_distance_count - clip;
  571. if (to_declare > 4)
  572. to_declare = 4;
  573. uint32_t semantic_index = clip / 4;
  574. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
  575. " : SV_ClipDistance", semantic_index, ";");
  576. }
  577. }
  578. break;
  579. }
  580. case BuiltInCullDistance:
  581. {
  582. static const char *types[] = { "float", "float2", "float3", "float4" };
  583. // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
  584. if (execution.model == ExecutionModelMeshEXT)
  585. {
  586. if (cull_distance_count > 4)
  587. SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders.");
  588. if (cull_distance_count == 1)
  589. {
  590. // Avoids having to hack up access_chain code. Makes it trivially indexable.
  591. statement("float gl_CullDistance[1] : SV_CullDistance;");
  592. }
  593. else
  594. {
  595. // Replace array with vector directly, avoids any weird fixup path.
  596. statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;");
  597. }
  598. }
  599. else
  600. {
  601. for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
  602. {
  603. uint32_t to_declare = cull_distance_count - cull;
  604. if (to_declare > 4)
  605. to_declare = 4;
  606. uint32_t semantic_index = cull / 4;
  607. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
  608. " : SV_CullDistance", semantic_index, ";");
  609. }
  610. }
  611. break;
  612. }
  613. case BuiltInPointSize:
  614. // If point_size_compat is enabled, just ignore PointSize.
  615. // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
  616. // even if it means working around the missing feature.
  617. if (legacy)
  618. {
  619. type = "float";
  620. semantic = "PSIZE";
  621. }
  622. else if (!hlsl_options.point_size_compat)
  623. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  624. break;
  625. case BuiltInLayer:
  626. case BuiltInPrimitiveId:
  627. case BuiltInViewportIndex:
  628. case BuiltInPrimitiveShadingRateKHR:
  629. case BuiltInCullPrimitiveEXT:
  630. // per-primitive attributes handled separatly
  631. break;
  632. case BuiltInPrimitivePointIndicesEXT:
  633. case BuiltInPrimitiveLineIndicesEXT:
  634. case BuiltInPrimitiveTriangleIndicesEXT:
  635. // meshlet local-index buffer handled separatly
  636. break;
  637. default:
  638. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  639. }
  640. if (type && semantic)
  641. statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
  642. });
  643. }
  644. void CompilerHLSL::emit_builtin_primitive_outputs_in_struct()
  645. {
  646. active_output_builtins.for_each_bit([&](uint32_t i) {
  647. const char *type = nullptr;
  648. const char *semantic = nullptr;
  649. auto builtin = static_cast<BuiltIn>(i);
  650. switch (builtin)
  651. {
  652. case BuiltInLayer:
  653. {
  654. if (hlsl_options.shader_model < 50)
  655. SPIRV_CROSS_THROW("Render target array index output is only supported in SM 5.0 or higher.");
  656. type = "uint";
  657. semantic = "SV_RenderTargetArrayIndex";
  658. break;
  659. }
  660. case BuiltInPrimitiveId:
  661. type = "uint";
  662. semantic = "SV_PrimitiveID";
  663. break;
  664. case BuiltInViewportIndex:
  665. type = "uint";
  666. semantic = "SV_ViewportArrayIndex";
  667. break;
  668. case BuiltInPrimitiveShadingRateKHR:
  669. type = "uint";
  670. semantic = "SV_ShadingRate";
  671. break;
  672. case BuiltInCullPrimitiveEXT:
  673. type = "bool";
  674. semantic = "SV_CullPrimitive";
  675. break;
  676. default:
  677. break;
  678. }
  679. if (type && semantic)
  680. statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
  681. });
  682. }
  683. void CompilerHLSL::emit_builtin_inputs_in_struct()
  684. {
  685. bool legacy = hlsl_options.shader_model <= 30;
  686. active_input_builtins.for_each_bit([&](uint32_t i) {
  687. const char *type = nullptr;
  688. const char *semantic = nullptr;
  689. auto builtin = static_cast<BuiltIn>(i);
  690. switch (builtin)
  691. {
  692. case BuiltInFragCoord:
  693. type = "float4";
  694. semantic = legacy ? "VPOS" : "SV_Position";
  695. break;
  696. case BuiltInVertexId:
  697. case BuiltInVertexIndex:
  698. if (legacy)
  699. SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower.");
  700. type = "uint";
  701. semantic = "SV_VertexID";
  702. break;
  703. case BuiltInPrimitiveId:
  704. type = "uint";
  705. semantic = "SV_PrimitiveID";
  706. break;
  707. case BuiltInInstanceId:
  708. case BuiltInInstanceIndex:
  709. if (legacy)
  710. SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower.");
  711. type = "uint";
  712. semantic = "SV_InstanceID";
  713. break;
  714. case BuiltInSampleId:
  715. if (legacy)
  716. SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower.");
  717. type = "uint";
  718. semantic = "SV_SampleIndex";
  719. break;
  720. case BuiltInSampleMask:
  721. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  722. SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher.");
  723. type = "uint";
  724. semantic = "SV_Coverage";
  725. break;
  726. case BuiltInGlobalInvocationId:
  727. type = "uint3";
  728. semantic = "SV_DispatchThreadID";
  729. break;
  730. case BuiltInLocalInvocationId:
  731. type = "uint3";
  732. semantic = "SV_GroupThreadID";
  733. break;
  734. case BuiltInLocalInvocationIndex:
  735. type = "uint";
  736. semantic = "SV_GroupIndex";
  737. break;
  738. case BuiltInWorkgroupId:
  739. type = "uint3";
  740. semantic = "SV_GroupID";
  741. break;
  742. case BuiltInFrontFacing:
  743. type = "bool";
  744. semantic = "SV_IsFrontFace";
  745. break;
  746. case BuiltInViewIndex:
  747. if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment))
  748. SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher.");
  749. type = "uint";
  750. semantic = "SV_ViewID";
  751. break;
  752. case BuiltInNumWorkgroups:
  753. case BuiltInSubgroupSize:
  754. case BuiltInSubgroupLocalInvocationId:
  755. case BuiltInSubgroupEqMask:
  756. case BuiltInSubgroupLtMask:
  757. case BuiltInSubgroupLeMask:
  758. case BuiltInSubgroupGtMask:
  759. case BuiltInSubgroupGeMask:
  760. // Handled specially.
  761. break;
  762. case BuiltInBaseVertex:
  763. if (hlsl_options.shader_model >= 68)
  764. {
  765. type = "uint";
  766. semantic = "SV_StartVertexLocation";
  767. }
  768. break;
  769. case BuiltInBaseInstance:
  770. if (hlsl_options.shader_model >= 68)
  771. {
  772. type = "uint";
  773. semantic = "SV_StartInstanceLocation";
  774. }
  775. break;
  776. case BuiltInHelperInvocation:
  777. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  778. SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
  779. break;
  780. case BuiltInClipDistance:
  781. // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
  782. for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
  783. {
  784. uint32_t to_declare = clip_distance_count - clip;
  785. if (to_declare > 4)
  786. to_declare = 4;
  787. uint32_t semantic_index = clip / 4;
  788. static const char *types[] = { "float", "float2", "float3", "float4" };
  789. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
  790. " : SV_ClipDistance", semantic_index, ";");
  791. }
  792. break;
  793. case BuiltInCullDistance:
  794. // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
  795. for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
  796. {
  797. uint32_t to_declare = cull_distance_count - cull;
  798. if (to_declare > 4)
  799. to_declare = 4;
  800. uint32_t semantic_index = cull / 4;
  801. static const char *types[] = { "float", "float2", "float3", "float4" };
  802. statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
  803. " : SV_CullDistance", semantic_index, ";");
  804. }
  805. break;
  806. case BuiltInPointCoord:
  807. // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
  808. if (hlsl_options.point_coord_compat)
  809. break;
  810. else
  811. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  812. case BuiltInLayer:
  813. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  814. SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher.");
  815. type = "uint";
  816. semantic = "SV_RenderTargetArrayIndex";
  817. break;
  818. case BuiltInBaryCoordKHR:
  819. case BuiltInBaryCoordNoPerspKHR:
  820. if (hlsl_options.shader_model < 61)
  821. SPIRV_CROSS_THROW("SM 6.1 is required for barycentrics.");
  822. type = builtin == BuiltInBaryCoordNoPerspKHR ? "noperspective float3" : "float3";
  823. if (active_input_builtins.get(BuiltInBaryCoordKHR) && active_input_builtins.get(BuiltInBaryCoordNoPerspKHR))
  824. semantic = builtin == BuiltInBaryCoordKHR ? "SV_Barycentrics0" : "SV_Barycentrics1";
  825. else
  826. semantic = "SV_Barycentrics";
  827. break;
  828. default:
  829. SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
  830. }
  831. if (type && semantic)
  832. statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";");
  833. });
  834. }
  835. uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
  836. {
  837. // TODO: Need to verify correctness.
  838. uint32_t elements = 0;
  839. if (type.basetype == SPIRType::Struct)
  840. {
  841. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  842. elements += type_to_consumed_locations(get<SPIRType>(type.member_types[i]));
  843. }
  844. else
  845. {
  846. uint32_t array_multiplier = 1;
  847. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  848. {
  849. if (type.array_size_literal[i])
  850. array_multiplier *= type.array[i];
  851. else
  852. array_multiplier *= evaluate_constant_u32(type.array[i]);
  853. }
  854. elements += array_multiplier * type.columns;
  855. }
  856. return elements;
  857. }
  858. string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
  859. {
  860. string res;
  861. //if (flags & (1ull << DecorationSmooth))
  862. // res += "linear ";
  863. if (flags.get(DecorationFlat) || flags.get(DecorationPerVertexKHR))
  864. res += "nointerpolation ";
  865. if (flags.get(DecorationNoPerspective))
  866. res += "noperspective ";
  867. if (flags.get(DecorationCentroid))
  868. res += "centroid ";
  869. if (flags.get(DecorationPatch))
  870. res += "patch "; // Seems to be different in actual HLSL.
  871. if (flags.get(DecorationSample))
  872. res += "sample ";
  873. if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
  874. res += "precise "; // Not supported?
  875. return res;
  876. }
  877. std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc)
  878. {
  879. if (em == ExecutionModelVertex && sc == StorageClassInput)
  880. {
  881. // We have a vertex attribute - we should look at remapping it if the user provided
  882. // vertex attribute hints.
  883. for (auto &attribute : remap_vertex_attributes)
  884. if (attribute.location == location)
  885. return attribute.semantic;
  886. }
  887. // Not a vertex attribute, or no remap_vertex_attributes entry.
  888. return join("TEXCOORD", location);
  889. }
  890. std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var)
  891. {
  892. // We cannot emit static const initializer for block constants for practical reasons,
  893. // so just inline the initializer.
  894. // FIXME: There is a theoretical problem here if someone tries to composite extract
  895. // into this initializer since we don't declare it properly, but that is somewhat non-sensical.
  896. auto &type = get<SPIRType>(var.basetype);
  897. bool is_block = has_decoration(type.self, DecorationBlock);
  898. auto *c = maybe_get<SPIRConstant>(var.initializer);
  899. if (is_block && c)
  900. return constant_expression(*c);
  901. else
  902. return CompilerGLSL::to_initializer_expression(var);
  903. }
  904. void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
  905. uint32_t location,
  906. std::unordered_set<uint32_t> &active_locations)
  907. {
  908. auto &execution = get_entry_point();
  909. auto type = get<SPIRType>(var.basetype);
  910. std::string semantic;
  911. if (hlsl_options.user_semantic && has_member_decoration(var.self, member_index, DecorationUserSemantic))
  912. semantic = get_member_decoration_string(var.self, member_index, DecorationUserSemantic);
  913. else
  914. semantic = to_semantic(location, execution.model, var.storage);
  915. auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index));
  916. auto &mbr_type = get<SPIRType>(type.member_types[member_index]);
  917. Bitset member_decorations = get_member_decoration_bitset(type.self, member_index);
  918. if (has_decoration(var.self, DecorationPerVertexKHR))
  919. member_decorations.set(DecorationPerVertexKHR);
  920. statement(to_interpolation_qualifiers(member_decorations),
  921. type_to_glsl(mbr_type),
  922. " ", mbr_name, type_to_array_glsl(mbr_type, var.self),
  923. " : ", semantic, ";");
  924. // Structs and arrays should consume more locations.
  925. uint32_t consumed_locations = type_to_consumed_locations(mbr_type);
  926. for (uint32_t i = 0; i < consumed_locations; i++)
  927. active_locations.insert(location + i);
  928. }
  929. void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations)
  930. {
  931. auto &execution = get_entry_point();
  932. auto type = get<SPIRType>(var.basetype);
  933. string binding;
  934. bool use_location_number = true;
  935. bool need_matrix_unroll = false;
  936. bool legacy = hlsl_options.shader_model <= 30;
  937. if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  938. {
  939. // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1.
  940. uint32_t index = get_decoration(var.self, DecorationIndex);
  941. uint32_t location = get_decoration(var.self, DecorationLocation);
  942. if (index != 0 && location != 0)
  943. SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL.");
  944. binding = join(legacy ? "COLOR" : "SV_Target", location + index);
  945. use_location_number = false;
  946. if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP)
  947. type.vecsize = 4;
  948. }
  949. else if (var.storage == StorageClassInput && execution.model == ExecutionModelVertex)
  950. {
  951. need_matrix_unroll = true;
  952. if (legacy) // Inputs must be floating-point in legacy targets.
  953. type.basetype = SPIRType::Float;
  954. }
  955. const auto get_vacant_location = [&]() -> uint32_t {
  956. for (uint32_t i = 0; i < 64; i++)
  957. if (!active_locations.count(i))
  958. return i;
  959. SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted.");
  960. };
  961. auto name = to_name(var.self);
  962. if (use_location_number)
  963. {
  964. uint32_t location_number = UINT32_MAX;
  965. std::string semantic;
  966. bool has_user_semantic = false;
  967. if (hlsl_options.user_semantic && has_decoration(var.self, DecorationUserSemantic))
  968. {
  969. semantic = get_decoration_string(var.self, DecorationUserSemantic);
  970. has_user_semantic = true;
  971. }
  972. else
  973. {
  974. // If an explicit location exists, use it with TEXCOORD[N] semantic.
  975. // Otherwise, pick a vacant location.
  976. if (has_decoration(var.self, DecorationLocation))
  977. location_number = get_decoration(var.self, DecorationLocation);
  978. else
  979. location_number = get_vacant_location();
  980. // Allow semantic remap if specified.
  981. semantic = to_semantic(location_number, execution.model, var.storage);
  982. }
  983. if (need_matrix_unroll && type.columns > 1)
  984. {
  985. if (!type.array.empty())
  986. SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported.");
  987. // Unroll matrices.
  988. for (uint32_t i = 0; i < type.columns; i++)
  989. {
  990. SPIRType newtype = type;
  991. newtype.columns = 1;
  992. string effective_semantic;
  993. if (hlsl_options.flatten_matrix_vertex_input_semantics && !has_user_semantic)
  994. effective_semantic = to_semantic(location_number, execution.model, var.storage);
  995. else
  996. effective_semantic = join(semantic, "_", i);
  997. statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)),
  998. variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";");
  999. if (location_number != UINT32_MAX)
  1000. active_locations.insert(location_number++);
  1001. }
  1002. }
  1003. else
  1004. {
  1005. auto decl_type = type;
  1006. if (execution.model == ExecutionModelMeshEXT ||
  1007. (execution.model == ExecutionModelGeometry && var.storage == StorageClassInput) ||
  1008. has_decoration(var.self, DecorationPerVertexKHR))
  1009. {
  1010. decl_type.array.erase(decl_type.array.begin());
  1011. decl_type.array_size_literal.erase(decl_type.array_size_literal.begin());
  1012. }
  1013. statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ",
  1014. semantic, ";");
  1015. if (location_number != UINT32_MAX)
  1016. {
  1017. // Structs and arrays should consume more locations.
  1018. uint32_t consumed_locations = type_to_consumed_locations(decl_type);
  1019. for (uint32_t i = 0; i < consumed_locations; i++)
  1020. active_locations.insert(location_number + i);
  1021. }
  1022. }
  1023. }
  1024. else
  1025. {
  1026. statement(variable_decl(type, name), " : ", binding, ";");
  1027. }
  1028. }
  1029. std::string CompilerHLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  1030. {
  1031. switch (builtin)
  1032. {
  1033. case BuiltInVertexId:
  1034. return "gl_VertexID";
  1035. case BuiltInInstanceId:
  1036. return "gl_InstanceID";
  1037. case BuiltInNumWorkgroups:
  1038. {
  1039. if (!num_workgroups_builtin)
  1040. SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. "
  1041. "Cannot emit code for this builtin.");
  1042. auto &var = get<SPIRVariable>(num_workgroups_builtin);
  1043. auto &type = get<SPIRType>(var.basetype);
  1044. auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0));
  1045. ParsedIR::sanitize_underscores(ret);
  1046. return ret;
  1047. }
  1048. case BuiltInPointCoord:
  1049. // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
  1050. return "float2(0.5f, 0.5f)";
  1051. case BuiltInSubgroupLocalInvocationId:
  1052. return "WaveGetLaneIndex()";
  1053. case BuiltInSubgroupSize:
  1054. return "WaveGetLaneCount()";
  1055. case BuiltInHelperInvocation:
  1056. return "IsHelperLane()";
  1057. default:
  1058. return CompilerGLSL::builtin_to_glsl(builtin, storage);
  1059. }
  1060. }
  1061. void CompilerHLSL::emit_builtin_variables()
  1062. {
  1063. Bitset builtins = active_input_builtins;
  1064. builtins.merge_or(active_output_builtins);
  1065. std::unordered_map<uint32_t, ID> builtin_to_initializer;
  1066. // We need to declare sample mask with the same type that module declares it.
  1067. // Sample mask is somewhat special in that SPIR-V has an array, and we can copy that array, so we need to
  1068. // match sign.
  1069. SPIRType::BaseType sample_mask_in_basetype = SPIRType::Void;
  1070. SPIRType::BaseType sample_mask_out_basetype = SPIRType::Void;
  1071. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1072. if (!is_builtin_variable(var))
  1073. return;
  1074. auto &type = this->get<SPIRType>(var.basetype);
  1075. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  1076. if (var.storage == StorageClassInput && builtin == BuiltInSampleMask)
  1077. sample_mask_in_basetype = type.basetype;
  1078. else if (var.storage == StorageClassOutput && builtin == BuiltInSampleMask)
  1079. sample_mask_out_basetype = type.basetype;
  1080. if (var.initializer && var.storage == StorageClassOutput)
  1081. {
  1082. auto *c = this->maybe_get<SPIRConstant>(var.initializer);
  1083. if (!c)
  1084. return;
  1085. if (type.basetype == SPIRType::Struct)
  1086. {
  1087. uint32_t member_count = uint32_t(type.member_types.size());
  1088. for (uint32_t i = 0; i < member_count; i++)
  1089. {
  1090. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  1091. {
  1092. builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] =
  1093. c->subconstants[i];
  1094. }
  1095. }
  1096. }
  1097. else if (has_decoration(var.self, DecorationBuiltIn))
  1098. {
  1099. builtin_to_initializer[builtin] = var.initializer;
  1100. }
  1101. }
  1102. });
  1103. // Emit global variables for the interface variables which are statically used by the shader.
  1104. builtins.for_each_bit([&](uint32_t i) {
  1105. const char *type = nullptr;
  1106. auto builtin = static_cast<BuiltIn>(i);
  1107. uint32_t array_size = 0;
  1108. string init_expr;
  1109. auto init_itr = builtin_to_initializer.find(builtin);
  1110. if (init_itr != builtin_to_initializer.end())
  1111. init_expr = join(" = ", to_expression(init_itr->second));
  1112. if (get_execution_model() == ExecutionModelMeshEXT)
  1113. {
  1114. if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  1115. builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId ||
  1116. builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT ||
  1117. builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT ||
  1118. builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT)
  1119. {
  1120. return;
  1121. }
  1122. }
  1123. switch (builtin)
  1124. {
  1125. case BuiltInFragCoord:
  1126. case BuiltInPosition:
  1127. type = "float4";
  1128. break;
  1129. case BuiltInFragDepth:
  1130. type = "float";
  1131. break;
  1132. case BuiltInVertexId:
  1133. case BuiltInVertexIndex:
  1134. case BuiltInInstanceIndex:
  1135. type = "int";
  1136. if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68)
  1137. base_vertex_info.used = true;
  1138. break;
  1139. case BuiltInBaseVertex:
  1140. case BuiltInBaseInstance:
  1141. type = "int";
  1142. base_vertex_info.used = true;
  1143. break;
  1144. case BuiltInInstanceId:
  1145. case BuiltInSampleId:
  1146. type = "int";
  1147. break;
  1148. case BuiltInPointSize:
  1149. if (hlsl_options.point_size_compat || hlsl_options.shader_model <= 30)
  1150. {
  1151. // Just emit the global variable, it will be ignored.
  1152. type = "float";
  1153. break;
  1154. }
  1155. else
  1156. SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
  1157. case BuiltInGlobalInvocationId:
  1158. case BuiltInLocalInvocationId:
  1159. case BuiltInWorkgroupId:
  1160. type = "uint3";
  1161. break;
  1162. case BuiltInLocalInvocationIndex:
  1163. type = "uint";
  1164. break;
  1165. case BuiltInFrontFacing:
  1166. type = "bool";
  1167. break;
  1168. case BuiltInNumWorkgroups:
  1169. case BuiltInPointCoord:
  1170. // Handled specially.
  1171. break;
  1172. case BuiltInSubgroupLocalInvocationId:
  1173. case BuiltInSubgroupSize:
  1174. if (hlsl_options.shader_model < 60)
  1175. SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
  1176. break;
  1177. case BuiltInSubgroupEqMask:
  1178. case BuiltInSubgroupLtMask:
  1179. case BuiltInSubgroupLeMask:
  1180. case BuiltInSubgroupGtMask:
  1181. case BuiltInSubgroupGeMask:
  1182. if (hlsl_options.shader_model < 60)
  1183. SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
  1184. type = "uint4";
  1185. break;
  1186. case BuiltInHelperInvocation:
  1187. if (hlsl_options.shader_model < 50)
  1188. SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation.");
  1189. break;
  1190. case BuiltInClipDistance:
  1191. array_size = clip_distance_count;
  1192. type = "float";
  1193. break;
  1194. case BuiltInCullDistance:
  1195. array_size = cull_distance_count;
  1196. type = "float";
  1197. break;
  1198. case BuiltInSampleMask:
  1199. if (active_input_builtins.get(BuiltInSampleMask))
  1200. type = sample_mask_in_basetype == SPIRType::UInt ? "uint" : "int";
  1201. else
  1202. type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int";
  1203. array_size = 1;
  1204. break;
  1205. case BuiltInPrimitiveId:
  1206. case BuiltInViewIndex:
  1207. case BuiltInLayer:
  1208. type = "uint";
  1209. break;
  1210. case BuiltInViewportIndex:
  1211. case BuiltInPrimitiveShadingRateKHR:
  1212. case BuiltInPrimitiveLineIndicesEXT:
  1213. case BuiltInCullPrimitiveEXT:
  1214. type = "uint";
  1215. break;
  1216. case BuiltInBaryCoordKHR:
  1217. case BuiltInBaryCoordNoPerspKHR:
  1218. if (hlsl_options.shader_model < 61)
  1219. SPIRV_CROSS_THROW("Need SM 6.1 for barycentrics.");
  1220. type = "float3";
  1221. break;
  1222. default:
  1223. SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
  1224. }
  1225. StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput;
  1226. if (type)
  1227. {
  1228. if (array_size)
  1229. statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";");
  1230. else
  1231. statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";");
  1232. }
  1233. // SampleMask can be both in and out with sample builtin, in this case we have already
  1234. // declared the input variable and we need to add the output one now.
  1235. if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i))
  1236. {
  1237. type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int";
  1238. if (array_size)
  1239. statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), "[", array_size, "]", init_expr, ";");
  1240. else
  1241. statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";");
  1242. }
  1243. });
  1244. if (base_vertex_info.used && hlsl_options.shader_model < 68)
  1245. {
  1246. string binding_info;
  1247. if (base_vertex_info.explicit_binding)
  1248. {
  1249. binding_info = join(" : register(b", base_vertex_info.register_index);
  1250. if (base_vertex_info.register_space)
  1251. binding_info += join(", space", base_vertex_info.register_space);
  1252. binding_info += ")";
  1253. }
  1254. statement("cbuffer SPIRV_Cross_VertexInfo", binding_info);
  1255. begin_scope();
  1256. statement("int SPIRV_Cross_BaseVertex;");
  1257. statement("int SPIRV_Cross_BaseInstance;");
  1258. end_scope_decl();
  1259. statement("");
  1260. }
  1261. }
  1262. void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space)
  1263. {
  1264. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1265. {
  1266. base_vertex_info.explicit_binding = true;
  1267. base_vertex_info.register_space = register_space;
  1268. base_vertex_info.register_index = register_index;
  1269. }
  1270. }
  1271. void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding)
  1272. {
  1273. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1274. base_vertex_info.explicit_binding = false;
  1275. }
  1276. bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const
  1277. {
  1278. if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
  1279. return base_vertex_info.used;
  1280. else
  1281. return false;
  1282. }
  1283. void CompilerHLSL::emit_composite_constants()
  1284. {
  1285. // HLSL cannot declare structs or arrays inline, so we must move them out to
  1286. // global constants directly.
  1287. bool emitted = false;
  1288. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  1289. if (c.specialization)
  1290. return;
  1291. auto &type = this->get<SPIRType>(c.constant_type);
  1292. if (type.basetype == SPIRType::Struct && is_builtin_type(type))
  1293. return;
  1294. if (type.basetype == SPIRType::Struct || !type.array.empty())
  1295. {
  1296. add_resource_name(c.self);
  1297. auto name = to_name(c.self);
  1298. statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
  1299. emitted = true;
  1300. }
  1301. });
  1302. if (emitted)
  1303. statement("");
  1304. }
  1305. void CompilerHLSL::emit_specialization_constants_and_structs()
  1306. {
  1307. bool emitted = false;
  1308. SpecializationConstant wg_x, wg_y, wg_z;
  1309. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  1310. std::unordered_set<TypeID> io_block_types;
  1311. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  1312. auto &type = this->get<SPIRType>(var.basetype);
  1313. if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  1314. !var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  1315. interface_variable_exists_in_entry_point(var.self) &&
  1316. has_decoration(type.self, DecorationBlock))
  1317. {
  1318. io_block_types.insert(type.self);
  1319. }
  1320. });
  1321. auto loop_lock = ir.create_loop_hard_lock();
  1322. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  1323. {
  1324. auto &id = ir.ids[id_];
  1325. if (id.get_type() == TypeConstant)
  1326. {
  1327. auto &c = id.get<SPIRConstant>();
  1328. if (c.self == workgroup_size_id)
  1329. {
  1330. statement("static const uint3 gl_WorkGroupSize = ",
  1331. constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
  1332. emitted = true;
  1333. }
  1334. else if (c.specialization)
  1335. {
  1336. auto &type = get<SPIRType>(c.constant_type);
  1337. add_resource_name(c.self);
  1338. auto name = to_name(c.self);
  1339. if (has_decoration(c.self, DecorationSpecId))
  1340. {
  1341. // HLSL does not support specialization constants, so fallback to macros.
  1342. c.specialization_constant_macro_name =
  1343. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  1344. statement("#ifndef ", c.specialization_constant_macro_name);
  1345. statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
  1346. statement("#endif");
  1347. statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";");
  1348. }
  1349. else
  1350. statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
  1351. emitted = true;
  1352. }
  1353. }
  1354. else if (id.get_type() == TypeConstantOp)
  1355. {
  1356. auto &c = id.get<SPIRConstantOp>();
  1357. auto &type = get<SPIRType>(c.basetype);
  1358. add_resource_name(c.self);
  1359. auto name = to_name(c.self);
  1360. statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
  1361. emitted = true;
  1362. }
  1363. else if (id.get_type() == TypeType)
  1364. {
  1365. auto &type = id.get<SPIRType>();
  1366. bool is_non_io_block = has_decoration(type.self, DecorationBlock) &&
  1367. io_block_types.count(type.self) == 0;
  1368. bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock);
  1369. if (type.basetype == SPIRType::Struct && type.array.empty() &&
  1370. !type.pointer && !is_non_io_block && !is_buffer_block)
  1371. {
  1372. if (emitted)
  1373. statement("");
  1374. emitted = false;
  1375. emit_struct(type);
  1376. }
  1377. }
  1378. else if (id.get_type() == TypeUndef)
  1379. {
  1380. auto &undef = id.get<SPIRUndef>();
  1381. auto &type = this->get<SPIRType>(undef.basetype);
  1382. // OpUndef can be void for some reason ...
  1383. if (type.basetype == SPIRType::Void)
  1384. return;
  1385. string initializer;
  1386. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  1387. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  1388. statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  1389. emitted = true;
  1390. }
  1391. }
  1392. if (emitted)
  1393. statement("");
  1394. }
  1395. void CompilerHLSL::replace_illegal_names()
  1396. {
  1397. static const unordered_set<string> keywords = {
  1398. // Additional HLSL specific keywords.
  1399. // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
  1400. "AppendStructuredBuffer", "asm", "asm_fragment",
  1401. "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer",
  1402. "case", "cbuffer", "centroid", "class", "column_major", "compile",
  1403. "compile_fragment", "CompileShader", "const", "continue", "ComputeShader",
  1404. "ConsumeStructuredBuffer",
  1405. "default", "DepthStencilState", "DepthStencilView", "discard", "do",
  1406. "double", "DomainShader", "dword",
  1407. "else", "export", "false", "float", "for", "fxgroup",
  1408. "GeometryShader", "groupshared", "half", "HullShader",
  1409. "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface",
  1410. "line", "lineadj", "linear", "LineStream",
  1411. "matrix", "min16float", "min10float", "min16int", "min16uint",
  1412. "namespace", "nointerpolation", "noperspective", "NULL",
  1413. "out", "OutputPatch",
  1414. "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point",
  1415. "PointStream", "precise", "RasterizerState", "RenderTargetView",
  1416. "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
  1417. "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
  1418. "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState",
  1419. "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state",
  1420. "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer",
  1421. "technique", "technique10", "technique11", "texture", "Texture1D",
  1422. "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
  1423. "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
  1424. "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
  1425. "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while",
  1426. "signed",
  1427. };
  1428. CompilerGLSL::replace_illegal_names(keywords);
  1429. CompilerGLSL::replace_illegal_names();
  1430. }
  1431. SPIRType::BaseType CompilerHLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type)
  1432. {
  1433. switch (builtin)
  1434. {
  1435. case BuiltInSampleMask:
  1436. // We declare sample mask array with module type, so always use default_type here.
  1437. return default_type;
  1438. default:
  1439. return CompilerGLSL::get_builtin_basetype(builtin, default_type);
  1440. }
  1441. }
  1442. void CompilerHLSL::emit_resources()
  1443. {
  1444. auto &execution = get_entry_point();
  1445. replace_illegal_names();
  1446. switch (execution.model)
  1447. {
  1448. case ExecutionModelGeometry:
  1449. case ExecutionModelTessellationControl:
  1450. case ExecutionModelTessellationEvaluation:
  1451. case ExecutionModelMeshEXT:
  1452. fixup_implicit_builtin_block_names(execution.model);
  1453. break;
  1454. default:
  1455. break;
  1456. }
  1457. emit_specialization_constants_and_structs();
  1458. emit_composite_constants();
  1459. bool emitted = false;
  1460. // Output UBOs and SSBOs
  1461. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1462. auto &type = this->get<SPIRType>(var.basetype);
  1463. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
  1464. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  1465. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1466. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  1467. has_block_flags)
  1468. {
  1469. emit_buffer_block(var);
  1470. emitted = true;
  1471. }
  1472. });
  1473. // Output push constant blocks
  1474. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1475. auto &type = this->get<SPIRType>(var.basetype);
  1476. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  1477. !is_hidden_variable(var))
  1478. {
  1479. emit_push_constant_block(var);
  1480. emitted = true;
  1481. }
  1482. });
  1483. if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 &&
  1484. active_output_builtins.get(BuiltInPosition))
  1485. {
  1486. statement("uniform float4 gl_HalfPixel;");
  1487. emitted = true;
  1488. }
  1489. bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30;
  1490. // Output Uniform Constants (values, samplers, images, etc).
  1491. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1492. auto &type = this->get<SPIRType>(var.basetype);
  1493. // If we're remapping separate samplers and images, only emit the combined samplers.
  1494. if (skip_separate_image_sampler)
  1495. {
  1496. // Sampler buffers are always used without a sampler, and they will also work in regular D3D.
  1497. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  1498. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  1499. bool separate_sampler = type.basetype == SPIRType::Sampler;
  1500. if (!sampler_buffer && (separate_image || separate_sampler))
  1501. return;
  1502. }
  1503. if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable &&
  1504. type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
  1505. !is_hidden_variable(var))
  1506. {
  1507. emit_uniform(var);
  1508. emitted = true;
  1509. }
  1510. });
  1511. if (emitted)
  1512. statement("");
  1513. emitted = false;
  1514. // Emit builtin input and output variables here.
  1515. emit_builtin_variables();
  1516. if (execution.model != ExecutionModelMeshEXT)
  1517. {
  1518. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1519. auto &type = this->get<SPIRType>(var.basetype);
  1520. bool is_hidden = is_hidden_io_variable(var);
  1521. if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
  1522. (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
  1523. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  1524. {
  1525. // Builtin variables are handled separately.
  1526. emit_interface_block_globally(var);
  1527. emitted = true;
  1528. }
  1529. });
  1530. }
  1531. if (emitted)
  1532. statement("");
  1533. emitted = false;
  1534. require_input = false;
  1535. require_output = false;
  1536. unordered_set<uint32_t> active_inputs;
  1537. unordered_set<uint32_t> active_outputs;
  1538. struct IOVariable
  1539. {
  1540. const SPIRVariable *var;
  1541. uint32_t location;
  1542. uint32_t block_member_index;
  1543. bool block;
  1544. };
  1545. SmallVector<IOVariable> input_variables;
  1546. SmallVector<IOVariable> output_variables;
  1547. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1548. auto &type = this->get<SPIRType>(var.basetype);
  1549. bool block = has_decoration(type.self, DecorationBlock);
  1550. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  1551. return;
  1552. bool is_hidden = is_hidden_io_variable(var);
  1553. if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  1554. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  1555. {
  1556. if (block)
  1557. {
  1558. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  1559. {
  1560. uint32_t location = get_declared_member_location(var, i, false);
  1561. if (var.storage == StorageClassInput)
  1562. input_variables.push_back({ &var, location, i, true });
  1563. else
  1564. output_variables.push_back({ &var, location, i, true });
  1565. }
  1566. }
  1567. else
  1568. {
  1569. uint32_t location = get_decoration(var.self, DecorationLocation);
  1570. if (var.storage == StorageClassInput)
  1571. input_variables.push_back({ &var, location, 0, false });
  1572. else
  1573. output_variables.push_back({ &var, location, 0, false });
  1574. }
  1575. }
  1576. });
  1577. const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool {
  1578. // Sort input and output variables based on, from more robust to less robust:
  1579. // - Location
  1580. // - Variable has a location
  1581. // - Name comparison
  1582. // - Variable has a name
  1583. // - Fallback: ID
  1584. bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation);
  1585. bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation);
  1586. if (has_location_a && has_location_b)
  1587. return a.location < b.location;
  1588. else if (has_location_a && !has_location_b)
  1589. return true;
  1590. else if (!has_location_a && has_location_b)
  1591. return false;
  1592. const auto &name1 = to_name(a.var->self);
  1593. const auto &name2 = to_name(b.var->self);
  1594. if (name1.empty() && name2.empty())
  1595. return a.var->self < b.var->self;
  1596. else if (name1.empty())
  1597. return true;
  1598. else if (name2.empty())
  1599. return false;
  1600. return name1.compare(name2) < 0;
  1601. };
  1602. auto input_builtins = active_input_builtins;
  1603. input_builtins.clear(BuiltInNumWorkgroups);
  1604. input_builtins.clear(BuiltInPointCoord);
  1605. input_builtins.clear(BuiltInSubgroupSize);
  1606. input_builtins.clear(BuiltInSubgroupLocalInvocationId);
  1607. input_builtins.clear(BuiltInSubgroupEqMask);
  1608. input_builtins.clear(BuiltInSubgroupLtMask);
  1609. input_builtins.clear(BuiltInSubgroupLeMask);
  1610. input_builtins.clear(BuiltInSubgroupGtMask);
  1611. input_builtins.clear(BuiltInSubgroupGeMask);
  1612. if (!input_variables.empty() || !input_builtins.empty())
  1613. {
  1614. require_input = true;
  1615. statement("struct SPIRV_Cross_Input");
  1616. begin_scope();
  1617. sort(input_variables.begin(), input_variables.end(), variable_compare);
  1618. for (auto &var : input_variables)
  1619. {
  1620. if (var.block)
  1621. emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs);
  1622. else
  1623. emit_interface_block_in_struct(*var.var, active_inputs);
  1624. }
  1625. emit_builtin_inputs_in_struct();
  1626. end_scope_decl();
  1627. statement("");
  1628. }
  1629. const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT;
  1630. if (!output_variables.empty() || !active_output_builtins.empty())
  1631. {
  1632. sort(output_variables.begin(), output_variables.end(), variable_compare);
  1633. require_output = !(is_mesh_shader || execution.model == ExecutionModelGeometry);
  1634. statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output");
  1635. begin_scope();
  1636. for (auto &var : output_variables)
  1637. {
  1638. if (is_per_primitive_variable(*var.var))
  1639. continue;
  1640. if (var.block && is_mesh_shader && var.block_member_index != 0)
  1641. continue;
  1642. if (var.block && !is_mesh_shader)
  1643. emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs);
  1644. else
  1645. emit_interface_block_in_struct(*var.var, active_outputs);
  1646. }
  1647. emit_builtin_outputs_in_struct();
  1648. if (!is_mesh_shader)
  1649. emit_builtin_primitive_outputs_in_struct();
  1650. end_scope_decl();
  1651. statement("");
  1652. if (is_mesh_shader)
  1653. {
  1654. statement("struct gl_MeshPerPrimitiveEXT");
  1655. begin_scope();
  1656. for (auto &var : output_variables)
  1657. {
  1658. if (!is_per_primitive_variable(*var.var))
  1659. continue;
  1660. if (var.block && var.block_member_index != 0)
  1661. continue;
  1662. emit_interface_block_in_struct(*var.var, active_outputs);
  1663. }
  1664. emit_builtin_primitive_outputs_in_struct();
  1665. end_scope_decl();
  1666. statement("");
  1667. }
  1668. }
  1669. // Global variables.
  1670. for (auto global : global_variables)
  1671. {
  1672. auto &var = get<SPIRVariable>(global);
  1673. if (is_hidden_variable(var, true))
  1674. continue;
  1675. if (var.storage == StorageClassTaskPayloadWorkgroupEXT && is_mesh_shader)
  1676. continue;
  1677. if (var.storage != StorageClassOutput)
  1678. {
  1679. if (!variable_is_lut(var))
  1680. {
  1681. add_resource_name(var.self);
  1682. const char *storage = nullptr;
  1683. switch (var.storage)
  1684. {
  1685. case StorageClassWorkgroup:
  1686. case StorageClassTaskPayloadWorkgroupEXT:
  1687. storage = "groupshared";
  1688. break;
  1689. default:
  1690. storage = "static";
  1691. break;
  1692. }
  1693. string initializer;
  1694. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  1695. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  1696. {
  1697. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  1698. }
  1699. statement(storage, " ", variable_decl(var), initializer, ";");
  1700. emitted = true;
  1701. }
  1702. }
  1703. }
  1704. if (emitted)
  1705. statement("");
  1706. if (requires_op_fmod)
  1707. {
  1708. static const char *types[] = {
  1709. "float",
  1710. "float2",
  1711. "float3",
  1712. "float4",
  1713. };
  1714. for (auto &type : types)
  1715. {
  1716. statement(type, " mod(", type, " x, ", type, " y)");
  1717. begin_scope();
  1718. statement("return x - y * floor(x / y);");
  1719. end_scope();
  1720. statement("");
  1721. }
  1722. }
  1723. emit_texture_size_variants(required_texture_size_variants.srv, "4", false, "");
  1724. for (uint32_t norm = 0; norm < 3; norm++)
  1725. {
  1726. for (uint32_t comp = 0; comp < 4; comp++)
  1727. {
  1728. static const char *qualifiers[] = { "", "unorm ", "snorm " };
  1729. static const char *vecsizes[] = { "", "2", "3", "4" };
  1730. emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true,
  1731. qualifiers[norm]);
  1732. }
  1733. }
  1734. if (requires_fp16_packing)
  1735. {
  1736. // HLSL does not pack into a single word sadly :(
  1737. statement("uint spvPackHalf2x16(float2 value)");
  1738. begin_scope();
  1739. statement("uint2 Packed = f32tof16(value);");
  1740. statement("return Packed.x | (Packed.y << 16);");
  1741. end_scope();
  1742. statement("");
  1743. statement("float2 spvUnpackHalf2x16(uint value)");
  1744. begin_scope();
  1745. statement("return f16tof32(uint2(value & 0xffff, value >> 16));");
  1746. end_scope();
  1747. statement("");
  1748. }
  1749. if (requires_uint2_packing)
  1750. {
  1751. statement("uint64_t spvPackUint2x32(uint2 value)");
  1752. begin_scope();
  1753. statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);");
  1754. end_scope();
  1755. statement("");
  1756. statement("uint2 spvUnpackUint2x32(uint64_t value)");
  1757. begin_scope();
  1758. statement("uint2 Unpacked;");
  1759. statement("Unpacked.x = uint(value & 0xffffffff);");
  1760. statement("Unpacked.y = uint(value >> 32);");
  1761. statement("return Unpacked;");
  1762. end_scope();
  1763. statement("");
  1764. }
  1765. if (requires_explicit_fp16_packing)
  1766. {
  1767. // HLSL does not pack into a single word sadly :(
  1768. statement("uint spvPackFloat2x16(min16float2 value)");
  1769. begin_scope();
  1770. statement("uint2 Packed = f32tof16(value);");
  1771. statement("return Packed.x | (Packed.y << 16);");
  1772. end_scope();
  1773. statement("");
  1774. statement("min16float2 spvUnpackFloat2x16(uint value)");
  1775. begin_scope();
  1776. statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
  1777. end_scope();
  1778. statement("");
  1779. }
  1780. // HLSL does not seem to have builtins for these operation, so roll them by hand ...
  1781. if (requires_unorm8_packing)
  1782. {
  1783. statement("uint spvPackUnorm4x8(float4 value)");
  1784. begin_scope();
  1785. statement("uint4 Packed = uint4(round(saturate(value) * 255.0));");
  1786. statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);");
  1787. end_scope();
  1788. statement("");
  1789. statement("float4 spvUnpackUnorm4x8(uint value)");
  1790. begin_scope();
  1791. statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);");
  1792. statement("return float4(Packed) / 255.0;");
  1793. end_scope();
  1794. statement("");
  1795. }
  1796. if (requires_snorm8_packing)
  1797. {
  1798. statement("uint spvPackSnorm4x8(float4 value)");
  1799. begin_scope();
  1800. statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;");
  1801. statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));");
  1802. end_scope();
  1803. statement("");
  1804. statement("float4 spvUnpackSnorm4x8(uint value)");
  1805. begin_scope();
  1806. statement("int SignedValue = int(value);");
  1807. statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;");
  1808. statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);");
  1809. end_scope();
  1810. statement("");
  1811. }
  1812. if (requires_unorm16_packing)
  1813. {
  1814. statement("uint spvPackUnorm2x16(float2 value)");
  1815. begin_scope();
  1816. statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));");
  1817. statement("return Packed.x | (Packed.y << 16);");
  1818. end_scope();
  1819. statement("");
  1820. statement("float2 spvUnpackUnorm2x16(uint value)");
  1821. begin_scope();
  1822. statement("uint2 Packed = uint2(value & 0xffff, value >> 16);");
  1823. statement("return float2(Packed) / 65535.0;");
  1824. end_scope();
  1825. statement("");
  1826. }
  1827. if (requires_snorm16_packing)
  1828. {
  1829. statement("uint spvPackSnorm2x16(float2 value)");
  1830. begin_scope();
  1831. statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;");
  1832. statement("return uint(Packed.x | (Packed.y << 16));");
  1833. end_scope();
  1834. statement("");
  1835. statement("float2 spvUnpackSnorm2x16(uint value)");
  1836. begin_scope();
  1837. statement("int SignedValue = int(value);");
  1838. statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;");
  1839. statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);");
  1840. end_scope();
  1841. statement("");
  1842. }
  1843. if (requires_bitfield_insert)
  1844. {
  1845. static const char *types[] = { "uint", "uint2", "uint3", "uint4" };
  1846. for (auto &type : types)
  1847. {
  1848. statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)");
  1849. begin_scope();
  1850. statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));");
  1851. statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);");
  1852. end_scope();
  1853. statement("");
  1854. }
  1855. }
  1856. if (requires_bitfield_extract)
  1857. {
  1858. static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" };
  1859. for (auto &type : unsigned_types)
  1860. {
  1861. statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)");
  1862. begin_scope();
  1863. statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);");
  1864. statement("return (Base >> Offset) & Mask;");
  1865. end_scope();
  1866. statement("");
  1867. }
  1868. // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down.
  1869. static const char *signed_types[] = { "int", "int2", "int3", "int4" };
  1870. for (auto &type : signed_types)
  1871. {
  1872. statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)");
  1873. begin_scope();
  1874. statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);");
  1875. statement(type, " Masked = (Base >> Offset) & Mask;");
  1876. statement("int ExtendShift = (32 - Count) & 31;");
  1877. statement("return (Masked << ExtendShift) >> ExtendShift;");
  1878. end_scope();
  1879. statement("");
  1880. }
  1881. }
  1882. if (requires_inverse_2x2)
  1883. {
  1884. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1885. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1886. statement("float2x2 spvInverse(float2x2 m)");
  1887. begin_scope();
  1888. statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1889. statement_no_indent("");
  1890. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1891. statement("adj[0][0] = m[1][1];");
  1892. statement("adj[0][1] = -m[0][1];");
  1893. statement_no_indent("");
  1894. statement("adj[1][0] = -m[1][0];");
  1895. statement("adj[1][1] = m[0][0];");
  1896. statement_no_indent("");
  1897. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  1898. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
  1899. statement_no_indent("");
  1900. statement("// Divide the classical adjoint matrix by the determinant.");
  1901. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  1902. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  1903. end_scope();
  1904. statement("");
  1905. }
  1906. if (requires_inverse_3x3)
  1907. {
  1908. statement("// Returns the determinant of a 2x2 matrix.");
  1909. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  1910. begin_scope();
  1911. statement("return a1 * b2 - b1 * a2;");
  1912. end_scope();
  1913. statement_no_indent("");
  1914. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1915. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1916. statement("float3x3 spvInverse(float3x3 m)");
  1917. begin_scope();
  1918. statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1919. statement_no_indent("");
  1920. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1921. statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
  1922. statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
  1923. statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
  1924. statement_no_indent("");
  1925. statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
  1926. statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
  1927. statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
  1928. statement_no_indent("");
  1929. statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
  1930. statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
  1931. statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
  1932. statement_no_indent("");
  1933. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  1934. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
  1935. statement_no_indent("");
  1936. statement("// Divide the classical adjoint matrix by the determinant.");
  1937. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  1938. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  1939. end_scope();
  1940. statement("");
  1941. }
  1942. if (requires_inverse_4x4)
  1943. {
  1944. if (!requires_inverse_3x3)
  1945. {
  1946. statement("// Returns the determinant of a 2x2 matrix.");
  1947. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  1948. begin_scope();
  1949. statement("return a1 * b2 - b1 * a2;");
  1950. end_scope();
  1951. statement("");
  1952. }
  1953. statement("// Returns the determinant of a 3x3 matrix.");
  1954. statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
  1955. "float c2, float c3)");
  1956. begin_scope();
  1957. statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * "
  1958. "spvDet2x2(a2, a3, "
  1959. "b2, b3);");
  1960. end_scope();
  1961. statement_no_indent("");
  1962. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  1963. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  1964. statement("float4x4 spvInverse(float4x4 m)");
  1965. begin_scope();
  1966. statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
  1967. statement_no_indent("");
  1968. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  1969. statement(
  1970. "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  1971. "m[3][3]);");
  1972. statement(
  1973. "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  1974. "m[3][3]);");
  1975. statement(
  1976. "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
  1977. "m[3][3]);");
  1978. statement(
  1979. "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
  1980. "m[2][3]);");
  1981. statement_no_indent("");
  1982. statement(
  1983. "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  1984. "m[3][3]);");
  1985. statement(
  1986. "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  1987. "m[3][3]);");
  1988. statement(
  1989. "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
  1990. "m[3][3]);");
  1991. statement(
  1992. "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
  1993. "m[2][3]);");
  1994. statement_no_indent("");
  1995. statement(
  1996. "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  1997. "m[3][3]);");
  1998. statement(
  1999. "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  2000. "m[3][3]);");
  2001. statement(
  2002. "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
  2003. "m[3][3]);");
  2004. statement(
  2005. "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
  2006. "m[2][3]);");
  2007. statement_no_indent("");
  2008. statement(
  2009. "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  2010. "m[3][2]);");
  2011. statement(
  2012. "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  2013. "m[3][2]);");
  2014. statement(
  2015. "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
  2016. "m[3][2]);");
  2017. statement(
  2018. "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
  2019. "m[2][2]);");
  2020. statement_no_indent("");
  2021. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  2022. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
  2023. "* m[3][0]);");
  2024. statement_no_indent("");
  2025. statement("// Divide the classical adjoint matrix by the determinant.");
  2026. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  2027. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  2028. end_scope();
  2029. statement("");
  2030. }
  2031. if (requires_scalar_reflect)
  2032. {
  2033. // FP16/FP64? No templates in HLSL.
  2034. statement("float spvReflect(float i, float n)");
  2035. begin_scope();
  2036. statement("return i - 2.0 * dot(n, i) * n;");
  2037. end_scope();
  2038. statement("");
  2039. }
  2040. if (requires_scalar_refract)
  2041. {
  2042. // FP16/FP64? No templates in HLSL.
  2043. statement("float spvRefract(float i, float n, float eta)");
  2044. begin_scope();
  2045. statement("float NoI = n * i;");
  2046. statement("float NoI2 = NoI * NoI;");
  2047. statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
  2048. statement("if (k < 0.0)");
  2049. begin_scope();
  2050. statement("return 0.0;");
  2051. end_scope();
  2052. statement("else");
  2053. begin_scope();
  2054. statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
  2055. end_scope();
  2056. end_scope();
  2057. statement("");
  2058. }
  2059. if (requires_scalar_faceforward)
  2060. {
  2061. // FP16/FP64? No templates in HLSL.
  2062. statement("float spvFaceForward(float n, float i, float nref)");
  2063. begin_scope();
  2064. statement("return i * nref < 0.0 ? n : -n;");
  2065. end_scope();
  2066. statement("");
  2067. }
  2068. for (TypeID type_id : composite_selection_workaround_types)
  2069. {
  2070. // Need out variable since HLSL does not support returning arrays.
  2071. auto &type = get<SPIRType>(type_id);
  2072. auto type_str = type_to_glsl(type);
  2073. auto type_arr_str = type_to_array_glsl(type, 0);
  2074. statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ",
  2075. type_str, " true_val", type_arr_str, ", ",
  2076. type_str, " false_val", type_arr_str, ")");
  2077. begin_scope();
  2078. statement("if (cond)");
  2079. begin_scope();
  2080. statement("out_value = true_val;");
  2081. end_scope();
  2082. statement("else");
  2083. begin_scope();
  2084. statement("out_value = false_val;");
  2085. end_scope();
  2086. end_scope();
  2087. statement("");
  2088. }
  2089. if (is_mesh_shader && options.vertex.flip_vert_y)
  2090. {
  2091. statement("float4 spvFlipVertY(float4 v)");
  2092. begin_scope();
  2093. statement("return float4(v.x, -v.y, v.z, v.w);");
  2094. end_scope();
  2095. statement("");
  2096. statement("float spvFlipVertY(float v)");
  2097. begin_scope();
  2098. statement("return -v;");
  2099. end_scope();
  2100. statement("");
  2101. }
  2102. }
  2103. void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav,
  2104. const char *type_qualifier)
  2105. {
  2106. if (variant_mask == 0)
  2107. return;
  2108. static const char *types[QueryTypeCount] = { "float", "int", "uint" };
  2109. static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray",
  2110. "Texture3D", "Buffer", "TextureCube", "TextureCubeArray",
  2111. "Texture2DMS", "Texture2DMSArray" };
  2112. static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
  2113. static const char *ret_types[QueryDimCount] = {
  2114. "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
  2115. };
  2116. static const uint32_t return_arguments[QueryDimCount] = {
  2117. 1, 2, 2, 3, 3, 1, 2, 3, 2, 3,
  2118. };
  2119. for (uint32_t index = 0; index < QueryDimCount; index++)
  2120. {
  2121. for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++)
  2122. {
  2123. uint32_t bit = 16 * type_index + index;
  2124. uint64_t mask = 1ull << bit;
  2125. if ((variant_mask & mask) == 0)
  2126. continue;
  2127. statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""),
  2128. dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ",
  2129. (uav ? "" : "uint Level, "), "out uint Param)");
  2130. begin_scope();
  2131. statement(ret_types[index], " ret;");
  2132. switch (return_arguments[index])
  2133. {
  2134. case 1:
  2135. if (has_lod[index] && !uav)
  2136. statement("Tex.GetDimensions(Level, ret.x, Param);");
  2137. else
  2138. {
  2139. statement("Tex.GetDimensions(ret.x);");
  2140. statement("Param = 0u;");
  2141. }
  2142. break;
  2143. case 2:
  2144. if (has_lod[index] && !uav)
  2145. statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);");
  2146. else if (!uav)
  2147. statement("Tex.GetDimensions(ret.x, ret.y, Param);");
  2148. else
  2149. {
  2150. statement("Tex.GetDimensions(ret.x, ret.y);");
  2151. statement("Param = 0u;");
  2152. }
  2153. break;
  2154. case 3:
  2155. if (has_lod[index] && !uav)
  2156. statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
  2157. else if (!uav)
  2158. statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
  2159. else
  2160. {
  2161. statement("Tex.GetDimensions(ret.x, ret.y, ret.z);");
  2162. statement("Param = 0u;");
  2163. }
  2164. break;
  2165. }
  2166. statement("return ret;");
  2167. end_scope();
  2168. statement("");
  2169. }
  2170. }
  2171. }
  2172. void CompilerHLSL::analyze_meshlet_writes()
  2173. {
  2174. uint32_t id_per_vertex = 0;
  2175. uint32_t id_per_primitive = 0;
  2176. bool need_per_primitive = false;
  2177. bool need_per_vertex = false;
  2178. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2179. auto &type = this->get<SPIRType>(var.basetype);
  2180. bool block = has_decoration(type.self, DecorationBlock);
  2181. if (var.storage == StorageClassOutput && block && is_builtin_variable(var))
  2182. {
  2183. auto flags = get_buffer_block_flags(var.self);
  2184. if (flags.get(DecorationPerPrimitiveEXT))
  2185. id_per_primitive = var.self;
  2186. else
  2187. id_per_vertex = var.self;
  2188. }
  2189. else if (var.storage == StorageClassOutput)
  2190. {
  2191. Bitset flags;
  2192. if (block)
  2193. flags = get_buffer_block_flags(var.self);
  2194. else
  2195. flags = get_decoration_bitset(var.self);
  2196. if (flags.get(DecorationPerPrimitiveEXT))
  2197. need_per_primitive = true;
  2198. else
  2199. need_per_vertex = true;
  2200. }
  2201. });
  2202. // If we have per-primitive outputs, and no per-primitive builtins,
  2203. // empty version of gl_MeshPerPrimitiveEXT will be emitted.
  2204. // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
  2205. const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
  2206. auto &execution = get_entry_point();
  2207. uint32_t op_type = ir.increase_bound_by(4);
  2208. uint32_t op_arr = op_type + 1;
  2209. uint32_t op_ptr = op_type + 2;
  2210. uint32_t op_var = op_type + 3;
  2211. auto &type = set<SPIRType>(op_type, OpTypeStruct);
  2212. type.basetype = SPIRType::Struct;
  2213. set_name(op_type, block_name);
  2214. set_decoration(op_type, DecorationBlock);
  2215. if (per_primitive)
  2216. set_decoration(op_type, DecorationPerPrimitiveEXT);
  2217. auto &arr = set<SPIRType>(op_arr, type);
  2218. arr.op = OpTypeArray;
  2219. arr.parent_type = type.self;
  2220. arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
  2221. arr.array_size_literal.push_back(true);
  2222. auto &ptr = set<SPIRType>(op_ptr, arr);
  2223. ptr.parent_type = arr.self;
  2224. ptr.op = OpTypePointer;
  2225. ptr.pointer = true;
  2226. ptr.pointer_depth++;
  2227. ptr.storage = StorageClassOutput;
  2228. set_decoration(op_ptr, DecorationBlock);
  2229. set_name(op_ptr, block_name);
  2230. auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
  2231. if (per_primitive)
  2232. set_decoration(op_var, DecorationPerPrimitiveEXT);
  2233. set_name(op_var, instance_name);
  2234. execution.interface_variables.push_back(var.self);
  2235. return op_var;
  2236. };
  2237. if (id_per_vertex == 0 && need_per_vertex)
  2238. id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
  2239. if (id_per_primitive == 0 && need_per_primitive)
  2240. id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
  2241. unordered_set<uint32_t> processed_func_ids;
  2242. analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);
  2243. }
  2244. void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
  2245. std::unordered_set<uint32_t> &processed_func_ids)
  2246. {
  2247. // Avoid processing a function more than once
  2248. if (processed_func_ids.find(func_id) != processed_func_ids.end())
  2249. return;
  2250. processed_func_ids.insert(func_id);
  2251. auto &func = get<SPIRFunction>(func_id);
  2252. // Recursively establish global args added to functions on which we depend.
  2253. for (auto& block : func.blocks)
  2254. {
  2255. auto &b = get<SPIRBlock>(block);
  2256. for (auto &i : b.ops)
  2257. {
  2258. auto ops = stream(i);
  2259. auto op = static_cast<Op>(i.op);
  2260. switch (op)
  2261. {
  2262. case OpFunctionCall:
  2263. {
  2264. // Then recurse into the function itself to extract globals used internally in the function
  2265. uint32_t inner_func_id = ops[2];
  2266. analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids);
  2267. auto &inner_func = get<SPIRFunction>(inner_func_id);
  2268. for (auto &iarg : inner_func.arguments)
  2269. {
  2270. if (!iarg.alias_global_variable)
  2271. continue;
  2272. bool already_declared = false;
  2273. for (auto &arg : func.arguments)
  2274. {
  2275. if (arg.id == iarg.id)
  2276. {
  2277. already_declared = true;
  2278. break;
  2279. }
  2280. }
  2281. if (!already_declared)
  2282. {
  2283. // basetype is effectively ignored here since we declare the argument
  2284. // with explicit types. Just pass down a valid type.
  2285. func.arguments.push_back({ expression_type_id(iarg.id), iarg.id,
  2286. iarg.read_count, iarg.write_count, true });
  2287. }
  2288. }
  2289. break;
  2290. }
  2291. case OpStore:
  2292. case OpLoad:
  2293. case OpInBoundsAccessChain:
  2294. case OpAccessChain:
  2295. case OpPtrAccessChain:
  2296. case OpInBoundsPtrAccessChain:
  2297. case OpArrayLength:
  2298. {
  2299. auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]);
  2300. if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT))
  2301. {
  2302. bool already_declared = false;
  2303. auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  2304. uint32_t var_id = var->self;
  2305. if (var->storage != StorageClassTaskPayloadWorkgroupEXT &&
  2306. builtin_type != BuiltInPrimitivePointIndicesEXT &&
  2307. builtin_type != BuiltInPrimitiveLineIndicesEXT &&
  2308. builtin_type != BuiltInPrimitiveTriangleIndicesEXT)
  2309. {
  2310. var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex;
  2311. }
  2312. for (auto &arg : func.arguments)
  2313. {
  2314. if (arg.id == var_id)
  2315. {
  2316. already_declared = true;
  2317. break;
  2318. }
  2319. }
  2320. if (!already_declared)
  2321. {
  2322. // basetype is effectively ignored here since we declare the argument
  2323. // with explicit types. Just pass down a valid type.
  2324. uint32_t type_id = expression_type_id(var_id);
  2325. if (var->storage == StorageClassTaskPayloadWorkgroupEXT)
  2326. func.arguments.push_back({ type_id, var_id, 1u, 0u, true });
  2327. else
  2328. func.arguments.push_back({ type_id, var_id, 1u, 1u, true });
  2329. }
  2330. }
  2331. break;
  2332. }
  2333. default:
  2334. break;
  2335. }
  2336. }
  2337. }
  2338. }
  2339. string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
  2340. {
  2341. auto &flags = get_member_decoration_bitset(type.self, index);
  2342. // HLSL can emit row_major or column_major decoration in any struct.
  2343. // Do not try to merge combined decorations for children like in GLSL.
  2344. // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major".
  2345. // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout.
  2346. if (flags.get(DecorationColMajor))
  2347. return "row_major ";
  2348. else if (flags.get(DecorationRowMajor))
  2349. return "column_major ";
  2350. return "";
  2351. }
  2352. void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  2353. const string &qualifier, uint32_t base_offset)
  2354. {
  2355. auto &membertype = get<SPIRType>(member_type_id);
  2356. Bitset memberflags;
  2357. auto &memb = ir.meta[type.self].members;
  2358. if (index < memb.size())
  2359. memberflags = memb[index].decoration_flags;
  2360. string packing_offset;
  2361. bool is_push_constant = type.storage == StorageClassPushConstant;
  2362. if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
  2363. has_member_decoration(type.self, index, DecorationOffset))
  2364. {
  2365. uint32_t offset = memb[index].offset - base_offset;
  2366. if (offset & 3)
  2367. SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL.");
  2368. static const char *packing_swizzle[] = { "", ".y", ".z", ".w" };
  2369. packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")");
  2370. }
  2371. statement(layout_for_member(type, index), qualifier,
  2372. variable_decl(membertype, to_member_name(type, index)), packing_offset, ";");
  2373. }
  2374. void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops)
  2375. {
  2376. flush_variable_declaration(ops[0]);
  2377. uint32_t is_commited = evaluate_constant_u32(ops[3]);
  2378. emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false);
  2379. }
  2380. void CompilerHLSL::emit_mesh_tasks(SPIRBlock &block)
  2381. {
  2382. if (block.mesh.payload != 0)
  2383. {
  2384. statement("DispatchMesh(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ",
  2385. to_unpacked_expression(block.mesh.groups[2]), ", ", to_unpacked_expression(block.mesh.payload), ");");
  2386. }
  2387. else
  2388. {
  2389. SPIRV_CROSS_THROW("Amplification shader in HLSL must have payload");
  2390. }
  2391. }
  2392. void CompilerHLSL::emit_geometry_stream_append()
  2393. {
  2394. begin_scope();
  2395. statement("SPIRV_Cross_Output stage_output;");
  2396. active_output_builtins.for_each_bit(
  2397. [&](uint32_t i)
  2398. {
  2399. if (i == BuiltInPointSize && hlsl_options.shader_model > 30)
  2400. return;
  2401. switch (static_cast<BuiltIn>(i))
  2402. {
  2403. case BuiltInClipDistance:
  2404. for (uint32_t clip = 0; clip < clip_distance_count; clip++)
  2405. statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[",
  2406. clip, "];");
  2407. break;
  2408. case BuiltInCullDistance:
  2409. for (uint32_t cull = 0; cull < cull_distance_count; cull++)
  2410. statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[",
  2411. cull, "];");
  2412. break;
  2413. case BuiltInSampleMask:
  2414. statement("stage_output.gl_SampleMask = gl_SampleMask[0];");
  2415. break;
  2416. default:
  2417. {
  2418. auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput);
  2419. statement("stage_output.", builtin_expr, " = ", builtin_expr, ";");
  2420. }
  2421. break;
  2422. }
  2423. });
  2424. ir.for_each_typed_id<SPIRVariable>(
  2425. [&](uint32_t, SPIRVariable &var)
  2426. {
  2427. auto &type = this->get<SPIRType>(var.basetype);
  2428. bool block = has_decoration(type.self, DecorationBlock);
  2429. if (var.storage != StorageClassOutput)
  2430. return;
  2431. if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  2432. interface_variable_exists_in_entry_point(var.self))
  2433. {
  2434. if (block)
  2435. {
  2436. auto type_name = to_name(type.self);
  2437. auto var_name = to_name(var.self);
  2438. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
  2439. {
  2440. auto mbr_name = to_member_name(type, mbr_idx);
  2441. auto flat_name = join(type_name, "_", mbr_name);
  2442. statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";");
  2443. }
  2444. }
  2445. else
  2446. {
  2447. auto name = to_name(var.self);
  2448. if (hlsl_options.shader_model <= 30 && get_entry_point().model == ExecutionModelFragment)
  2449. {
  2450. string output_filler;
  2451. for (uint32_t size = type.vecsize; size < 4; ++size)
  2452. output_filler += ", 0.0";
  2453. statement("stage_output.", name, " = float4(", name, output_filler, ");");
  2454. }
  2455. else
  2456. statement("stage_output.", name, " = ", name, ";");
  2457. }
  2458. }
  2459. });
  2460. statement("geometry_stream.Append(stage_output);");
  2461. end_scope();
  2462. }
  2463. void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
  2464. {
  2465. auto &type = get<SPIRType>(var.basetype);
  2466. bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock);
  2467. if (flattened_buffer_blocks.count(var.self))
  2468. {
  2469. emit_buffer_block_flattened(var);
  2470. }
  2471. else if (is_uav)
  2472. {
  2473. Bitset flags = ir.get_buffer_block_flags(var);
  2474. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  2475. bool is_coherent = flags.get(DecorationCoherent) && !is_readonly;
  2476. bool is_interlocked = interlocked_resources.count(var.self) > 0;
  2477. auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string
  2478. {
  2479. if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1)
  2480. {
  2481. // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V
  2482. const auto &member0_type = this->get<SPIRType>(parent_type.member_types.front());
  2483. return this->type_to_glsl(member0_type);
  2484. }
  2485. else
  2486. {
  2487. // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer<int>
  2488. return this->type_to_glsl(parent_type);
  2489. }
  2490. };
  2491. std::string type_name;
  2492. if (is_user_type_structured(var.self))
  2493. type_name = join(is_readonly ? "" : is_interlocked ? "RasterizerOrdered" : "RW", "StructuredBuffer<", to_structuredbuffer_subtype_name(type), ">");
  2494. else
  2495. type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer";
  2496. add_resource_name(var.self);
  2497. statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type, var.self),
  2498. to_resource_binding(var), ";");
  2499. }
  2500. else
  2501. {
  2502. if (type.array.empty())
  2503. {
  2504. // Flatten the top-level struct so we can use packoffset,
  2505. // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs.
  2506. flattened_structs[var.self] = false;
  2507. // Prefer the block name if possible.
  2508. auto buffer_name = to_name(type.self, false);
  2509. if (ir.meta[type.self].decoration.alias.empty() ||
  2510. resource_names.find(buffer_name) != end(resource_names) ||
  2511. block_names.find(buffer_name) != end(block_names))
  2512. {
  2513. buffer_name = get_block_fallback_name(var.self);
  2514. }
  2515. add_variable(block_names, resource_names, buffer_name);
  2516. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2517. // This cannot conflict with anything else, so we're safe now.
  2518. if (buffer_name.empty())
  2519. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2520. uint32_t failed_index = 0;
  2521. if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index))
  2522. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2523. else
  2524. {
  2525. SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ",
  2526. failed_index, " (name: ", to_member_name(type, failed_index),
  2527. ") cannot be expressed with either HLSL packing layout or packoffset."));
  2528. }
  2529. block_names.insert(buffer_name);
  2530. // Save for post-reflection later.
  2531. declared_block_names[var.self] = buffer_name;
  2532. type.member_name_cache.clear();
  2533. // var.self can be used as a backup name for the block name,
  2534. // so we need to make sure we don't disturb the name here on a recompile.
  2535. // It will need to be reset if we have to recompile.
  2536. preserve_alias_on_reset(var.self);
  2537. add_resource_name(var.self);
  2538. statement("cbuffer ", buffer_name, to_resource_binding(var));
  2539. begin_scope();
  2540. uint32_t i = 0;
  2541. for (auto &member : type.member_types)
  2542. {
  2543. add_member_name(type, i);
  2544. auto backup_name = get_member_name(type.self, i);
  2545. auto member_name = to_member_name(type, i);
  2546. member_name = join(to_name(var.self), "_", member_name);
  2547. ParsedIR::sanitize_underscores(member_name);
  2548. set_member_name(type.self, i, member_name);
  2549. emit_struct_member(type, member, i, "");
  2550. set_member_name(type.self, i, backup_name);
  2551. i++;
  2552. }
  2553. end_scope_decl();
  2554. statement("");
  2555. }
  2556. else
  2557. {
  2558. if (hlsl_options.shader_model < 51)
  2559. SPIRV_CROSS_THROW(
  2560. "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
  2561. add_resource_name(type.self);
  2562. add_resource_name(var.self);
  2563. // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
  2564. uint32_t failed_index = 0;
  2565. if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index))
  2566. {
  2567. SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self),
  2568. "), member index ", failed_index, " (name: ", to_member_name(type, failed_index),
  2569. ") cannot be expressed with normal HLSL packing rules."));
  2570. }
  2571. emit_struct(get<SPIRType>(type.self));
  2572. statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type, var.self),
  2573. to_resource_binding(var), ";");
  2574. }
  2575. }
  2576. }
  2577. void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
  2578. {
  2579. if (flattened_buffer_blocks.count(var.self))
  2580. {
  2581. emit_buffer_block_flattened(var);
  2582. }
  2583. else if (root_constants_layout.empty())
  2584. {
  2585. emit_buffer_block(var);
  2586. }
  2587. else
  2588. {
  2589. for (const auto &layout : root_constants_layout)
  2590. {
  2591. auto &type = get<SPIRType>(var.basetype);
  2592. uint32_t failed_index = 0;
  2593. if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start,
  2594. layout.end))
  2595. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2596. else
  2597. {
  2598. SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")",
  2599. ", member index ", failed_index, " (name: ", to_member_name(type, failed_index),
  2600. ") cannot be expressed with either HLSL packing layout or packoffset."));
  2601. }
  2602. flattened_structs[var.self] = false;
  2603. type.member_name_cache.clear();
  2604. add_resource_name(var.self);
  2605. auto &memb = ir.meta[type.self].members;
  2606. statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self),
  2607. to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space));
  2608. begin_scope();
  2609. // Index of the next field in the generated root constant constant buffer
  2610. auto constant_index = 0u;
  2611. // Iterate over all member of the push constant and check which of the fields
  2612. // fit into the given root constant layout.
  2613. for (auto i = 0u; i < memb.size(); i++)
  2614. {
  2615. const auto offset = memb[i].offset;
  2616. if (layout.start <= offset && offset < layout.end)
  2617. {
  2618. const auto &member = type.member_types[i];
  2619. add_member_name(type, constant_index);
  2620. auto backup_name = get_member_name(type.self, i);
  2621. auto member_name = to_member_name(type, i);
  2622. member_name = join(to_name(var.self), "_", member_name);
  2623. ParsedIR::sanitize_underscores(member_name);
  2624. set_member_name(type.self, constant_index, member_name);
  2625. emit_struct_member(type, member, i, "", layout.start);
  2626. set_member_name(type.self, constant_index, backup_name);
  2627. constant_index++;
  2628. }
  2629. }
  2630. end_scope_decl();
  2631. }
  2632. }
  2633. }
  2634. string CompilerHLSL::to_sampler_expression(uint32_t id)
  2635. {
  2636. auto expr = join("_", to_non_uniform_aware_expression(id));
  2637. auto index = expr.find_first_of('[');
  2638. if (index == string::npos)
  2639. {
  2640. return expr + "_sampler";
  2641. }
  2642. else
  2643. {
  2644. // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead.
  2645. return expr.insert(index, "_sampler");
  2646. }
  2647. }
  2648. void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  2649. {
  2650. if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty())
  2651. {
  2652. set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
  2653. }
  2654. else
  2655. {
  2656. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  2657. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  2658. }
  2659. }
  2660. string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  2661. {
  2662. string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
  2663. if (hlsl_options.shader_model <= 30)
  2664. return arg_str;
  2665. // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL.
  2666. auto &type = expression_type(id);
  2667. // We don't have to consider combined image samplers here via OpSampledImage because
  2668. // those variables cannot be passed as arguments to functions.
  2669. // Only global SampledImage variables may be used as arguments.
  2670. if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
  2671. arg_str += ", " + to_sampler_expression(id);
  2672. return arg_str;
  2673. }
  2674. string CompilerHLSL::get_inner_entry_point_name() const
  2675. {
  2676. auto &execution = get_entry_point();
  2677. if (hlsl_options.use_entry_point_name)
  2678. {
  2679. auto name = join(execution.name, "_inner");
  2680. ParsedIR::sanitize_underscores(name);
  2681. return name;
  2682. }
  2683. if (execution.model == ExecutionModelVertex)
  2684. return "vert_main";
  2685. else if (execution.model == ExecutionModelFragment)
  2686. return "frag_main";
  2687. else if (execution.model == ExecutionModelGLCompute)
  2688. return "comp_main";
  2689. else if (execution.model == ExecutionModelGeometry)
  2690. return "geom_main";
  2691. else if (execution.model == ExecutionModelMeshEXT)
  2692. return "mesh_main";
  2693. else if (execution.model == ExecutionModelTaskEXT)
  2694. return "task_main";
  2695. else
  2696. SPIRV_CROSS_THROW("Unsupported execution model.");
  2697. }
  2698. uint32_t CompilerHLSL::input_vertices_from_execution_mode(SPIREntryPoint &execution) const
  2699. {
  2700. uint32_t input_vertices = 1;
  2701. if (execution.flags.get(ExecutionModeInputLines))
  2702. input_vertices = 2;
  2703. else if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  2704. input_vertices = 4;
  2705. else if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  2706. input_vertices = 6;
  2707. else if (execution.flags.get(ExecutionModeTriangles))
  2708. input_vertices = 3;
  2709. else if (execution.flags.get(ExecutionModeInputPoints))
  2710. input_vertices = 1;
  2711. else
  2712. SPIRV_CROSS_THROW("Unsupported execution model.");
  2713. return input_vertices;
  2714. }
  2715. void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  2716. {
  2717. if (func.self != ir.default_entry_point)
  2718. add_function_overload(func);
  2719. // Avoid shadow declarations.
  2720. local_variable_names = resource_names;
  2721. string decl;
  2722. auto &type = get<SPIRType>(func.return_type);
  2723. if (type.array.empty())
  2724. {
  2725. decl += flags_to_qualifiers_glsl(type, 0, return_flags);
  2726. decl += type_to_glsl(type);
  2727. decl += " ";
  2728. }
  2729. else
  2730. {
  2731. // We cannot return arrays in HLSL, so "return" through an out variable.
  2732. decl = "void ";
  2733. }
  2734. if (func.self == ir.default_entry_point)
  2735. {
  2736. decl += get_inner_entry_point_name();
  2737. processing_entry_point = true;
  2738. }
  2739. else
  2740. decl += to_name(func.self);
  2741. decl += "(";
  2742. SmallVector<string> arglist;
  2743. if (!type.array.empty())
  2744. {
  2745. // Fake array returns by writing to an out array instead.
  2746. string out_argument;
  2747. out_argument += "out ";
  2748. out_argument += type_to_glsl(type);
  2749. out_argument += " ";
  2750. out_argument += "spvReturnValue";
  2751. out_argument += type_to_array_glsl(type, 0);
  2752. arglist.push_back(std::move(out_argument));
  2753. }
  2754. for (auto &arg : func.arguments)
  2755. {
  2756. // Do not pass in separate images or samplers if we're remapping
  2757. // to combined image samplers.
  2758. if (skip_argument(arg.id))
  2759. continue;
  2760. // Might change the variable name if it already exists in this function.
  2761. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  2762. // to use same name for variables.
  2763. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  2764. add_local_variable_name(arg.id);
  2765. arglist.push_back(argument_decl(arg));
  2766. // Flatten a combined sampler to two separate arguments in modern HLSL.
  2767. auto &arg_type = get<SPIRType>(arg.type);
  2768. if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage &&
  2769. arg_type.image.dim != DimBuffer)
  2770. {
  2771. // Manufacture automatic sampler arg for SampledImage texture
  2772. arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ",
  2773. to_sampler_expression(arg.id), type_to_array_glsl(arg_type, arg.id)));
  2774. }
  2775. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  2776. auto *var = maybe_get<SPIRVariable>(arg.id);
  2777. if (var)
  2778. var->parameter = &arg;
  2779. }
  2780. for (auto &arg : func.shadow_arguments)
  2781. {
  2782. // Might change the variable name if it already exists in this function.
  2783. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  2784. // to use same name for variables.
  2785. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  2786. add_local_variable_name(arg.id);
  2787. arglist.push_back(argument_decl(arg));
  2788. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  2789. auto *var = maybe_get<SPIRVariable>(arg.id);
  2790. if (var)
  2791. var->parameter = &arg;
  2792. }
  2793. if ((func.self == ir.default_entry_point || func.emits_geometry) &&
  2794. get_entry_point().model == ExecutionModelGeometry)
  2795. {
  2796. auto &execution = get_entry_point();
  2797. uint32_t input_vertices = input_vertices_from_execution_mode(execution);
  2798. const char *prim;
  2799. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  2800. prim = "lineadj";
  2801. else if (execution.flags.get(ExecutionModeInputLines))
  2802. prim = "line";
  2803. else if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  2804. prim = "triangleadj";
  2805. else if (execution.flags.get(ExecutionModeTriangles))
  2806. prim = "triangle";
  2807. else
  2808. prim = "point";
  2809. const char *stream_type;
  2810. if (execution.flags.get(ExecutionModeOutputPoints))
  2811. stream_type = "PointStream";
  2812. else if (execution.flags.get(ExecutionModeOutputLineStrip))
  2813. stream_type = "LineStream";
  2814. else
  2815. stream_type = "TriangleStream";
  2816. if (func.self == ir.default_entry_point)
  2817. arglist.push_back(join(prim, " SPIRV_Cross_Input stage_input[", input_vertices, "]"));
  2818. arglist.push_back(join("inout ", stream_type, "<SPIRV_Cross_Output> ", "geometry_stream"));
  2819. }
  2820. decl += merge(arglist);
  2821. decl += ")";
  2822. statement(decl);
  2823. }
  2824. void CompilerHLSL::emit_hlsl_entry_point()
  2825. {
  2826. SmallVector<string> arguments;
  2827. if (require_input && get_entry_point().model != ExecutionModelGeometry)
  2828. arguments.push_back("SPIRV_Cross_Input stage_input");
  2829. auto &execution = get_entry_point();
  2830. uint32_t input_vertices = 1;
  2831. switch (execution.model)
  2832. {
  2833. case ExecutionModelGeometry:
  2834. {
  2835. input_vertices = input_vertices_from_execution_mode(execution);
  2836. string prim;
  2837. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  2838. prim = "lineadj";
  2839. else if (execution.flags.get(ExecutionModeInputLines))
  2840. prim = "line";
  2841. else if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  2842. prim = "triangleadj";
  2843. else if (execution.flags.get(ExecutionModeTriangles))
  2844. prim = "triangle";
  2845. else
  2846. prim = "point";
  2847. string stream_type;
  2848. if (execution.flags.get(ExecutionModeOutputPoints))
  2849. {
  2850. stream_type = "PointStream";
  2851. }
  2852. else if (execution.flags.get(ExecutionModeOutputLineStrip))
  2853. {
  2854. stream_type = "LineStream";
  2855. }
  2856. else
  2857. {
  2858. stream_type = "TriangleStream";
  2859. }
  2860. statement("[maxvertexcount(", execution.output_vertices, ")]");
  2861. arguments.push_back(join(prim, " SPIRV_Cross_Input stage_input[", input_vertices, "]"));
  2862. arguments.push_back(join("inout ", stream_type, "<SPIRV_Cross_Output> ", "geometry_stream"));
  2863. break;
  2864. }
  2865. case ExecutionModelTaskEXT:
  2866. case ExecutionModelMeshEXT:
  2867. case ExecutionModelGLCompute:
  2868. {
  2869. if (execution.model == ExecutionModelMeshEXT)
  2870. {
  2871. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  2872. statement("[outputtopology(\"triangle\")]");
  2873. else if (execution.flags.get(ExecutionModeOutputLinesEXT))
  2874. statement("[outputtopology(\"line\")]");
  2875. else if (execution.flags.get(ExecutionModeOutputPoints))
  2876. SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX");
  2877. auto &func = get<SPIRFunction>(ir.default_entry_point);
  2878. for (auto &arg : func.arguments)
  2879. {
  2880. auto &var = get<SPIRVariable>(arg.id);
  2881. auto &base_type = get<SPIRType>(var.basetype);
  2882. bool block = has_decoration(base_type.self, DecorationBlock);
  2883. if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
  2884. {
  2885. arguments.push_back("in payload " + variable_decl(var));
  2886. }
  2887. else if (block)
  2888. {
  2889. auto flags = get_buffer_block_flags(var.self);
  2890. if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT))
  2891. {
  2892. arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" +
  2893. std::to_string(execution.output_primitives) + "]");
  2894. }
  2895. else
  2896. {
  2897. arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" +
  2898. std::to_string(execution.output_vertices) + "]");
  2899. }
  2900. }
  2901. else
  2902. {
  2903. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  2904. {
  2905. arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" +
  2906. std::to_string(execution.output_primitives) + "]");
  2907. }
  2908. else
  2909. {
  2910. arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" +
  2911. std::to_string(execution.output_primitives) + "]");
  2912. }
  2913. }
  2914. }
  2915. }
  2916. SpecializationConstant wg_x, wg_y, wg_z;
  2917. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2918. uint32_t x = execution.workgroup_size.x;
  2919. uint32_t y = execution.workgroup_size.y;
  2920. uint32_t z = execution.workgroup_size.z;
  2921. if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId))
  2922. {
  2923. if (execution.workgroup_size.id_x)
  2924. x = get<SPIRConstant>(execution.workgroup_size.id_x).scalar();
  2925. if (execution.workgroup_size.id_y)
  2926. y = get<SPIRConstant>(execution.workgroup_size.id_y).scalar();
  2927. if (execution.workgroup_size.id_z)
  2928. z = get<SPIRConstant>(execution.workgroup_size.id_z).scalar();
  2929. }
  2930. auto x_expr = wg_x.id ? get<SPIRConstant>(wg_x.id).specialization_constant_macro_name : to_string(x);
  2931. auto y_expr = wg_y.id ? get<SPIRConstant>(wg_y.id).specialization_constant_macro_name : to_string(y);
  2932. auto z_expr = wg_z.id ? get<SPIRConstant>(wg_z.id).specialization_constant_macro_name : to_string(z);
  2933. statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]");
  2934. break;
  2935. }
  2936. case ExecutionModelFragment:
  2937. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  2938. statement("[earlydepthstencil]");
  2939. break;
  2940. default:
  2941. break;
  2942. }
  2943. const char *entry_point_name;
  2944. if (hlsl_options.use_entry_point_name)
  2945. entry_point_name = get_entry_point().name.c_str();
  2946. else
  2947. entry_point_name = "main";
  2948. statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")");
  2949. begin_scope();
  2950. bool legacy = hlsl_options.shader_model <= 30;
  2951. // Copy builtins from entry point arguments to globals.
  2952. active_input_builtins.for_each_bit([&](uint32_t i) {
  2953. auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassInput);
  2954. switch (static_cast<BuiltIn>(i))
  2955. {
  2956. case BuiltInFragCoord:
  2957. // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent.
  2958. // TODO: Do we need an option here? Any reason why a D3D9 shader would be used
  2959. // on a D3D10+ system with a different rasterization config?
  2960. if (legacy)
  2961. statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);");
  2962. else
  2963. {
  2964. statement(builtin, " = stage_input.", builtin, ";");
  2965. // ZW are undefined in D3D9, only do this fixup here.
  2966. statement(builtin, ".w = 1.0 / ", builtin, ".w;");
  2967. }
  2968. break;
  2969. case BuiltInVertexId:
  2970. case BuiltInVertexIndex:
  2971. case BuiltInInstanceIndex:
  2972. // D3D semantics are uint, but shader wants int.
  2973. if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68)
  2974. {
  2975. if (hlsl_options.shader_model >= 68)
  2976. {
  2977. if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex)
  2978. statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseInstanceARB);");
  2979. else
  2980. statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseVertexARB);");
  2981. }
  2982. else
  2983. {
  2984. if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex)
  2985. statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;");
  2986. else
  2987. statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;");
  2988. }
  2989. }
  2990. else
  2991. statement(builtin, " = int(stage_input.", builtin, ");");
  2992. break;
  2993. case BuiltInBaseVertex:
  2994. if (hlsl_options.shader_model >= 68)
  2995. statement(builtin, " = stage_input.gl_BaseVertexARB;");
  2996. else
  2997. statement(builtin, " = SPIRV_Cross_BaseVertex;");
  2998. break;
  2999. case BuiltInBaseInstance:
  3000. if (hlsl_options.shader_model >= 68)
  3001. statement(builtin, " = stage_input.gl_BaseInstanceARB;");
  3002. else
  3003. statement(builtin, " = SPIRV_Cross_BaseInstance;");
  3004. break;
  3005. case BuiltInInstanceId:
  3006. // D3D semantics are uint, but shader wants int.
  3007. statement(builtin, " = int(stage_input.", builtin, ");");
  3008. break;
  3009. case BuiltInSampleMask:
  3010. statement(builtin, "[0] = stage_input.", builtin, ";");
  3011. break;
  3012. case BuiltInNumWorkgroups:
  3013. case BuiltInPointCoord:
  3014. case BuiltInSubgroupSize:
  3015. case BuiltInSubgroupLocalInvocationId:
  3016. case BuiltInHelperInvocation:
  3017. break;
  3018. case BuiltInSubgroupEqMask:
  3019. // Emulate these ...
  3020. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  3021. statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));");
  3022. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;");
  3023. statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;");
  3024. statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;");
  3025. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;");
  3026. break;
  3027. case BuiltInSubgroupGeMask:
  3028. // Emulate these ...
  3029. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  3030. statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);");
  3031. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;");
  3032. statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;");
  3033. statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;");
  3034. statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;");
  3035. statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;");
  3036. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;");
  3037. break;
  3038. case BuiltInSubgroupGtMask:
  3039. // Emulate these ...
  3040. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  3041. statement("uint gt_lane_index = WaveGetLaneIndex() + 1;");
  3042. statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);");
  3043. statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;");
  3044. statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;");
  3045. statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;");
  3046. statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;");
  3047. statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;");
  3048. statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;");
  3049. statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;");
  3050. break;
  3051. case BuiltInSubgroupLeMask:
  3052. // Emulate these ...
  3053. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  3054. statement("uint le_lane_index = WaveGetLaneIndex() + 1;");
  3055. statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;");
  3056. statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;");
  3057. statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;");
  3058. statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;");
  3059. statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;");
  3060. statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;");
  3061. statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;");
  3062. statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;");
  3063. break;
  3064. case BuiltInSubgroupLtMask:
  3065. // Emulate these ...
  3066. // No 64-bit in HLSL, so have to do it in 32-bit and unroll.
  3067. statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;");
  3068. statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;");
  3069. statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;");
  3070. statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;");
  3071. statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;");
  3072. statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;");
  3073. statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;");
  3074. break;
  3075. case BuiltInClipDistance:
  3076. for (uint32_t clip = 0; clip < clip_distance_count; clip++)
  3077. statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3],
  3078. ";");
  3079. break;
  3080. case BuiltInCullDistance:
  3081. for (uint32_t cull = 0; cull < cull_distance_count; cull++)
  3082. statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3],
  3083. ";");
  3084. break;
  3085. default:
  3086. statement(builtin, " = stage_input.", builtin, ";");
  3087. break;
  3088. }
  3089. });
  3090. // Copy from stage input struct to globals.
  3091. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3092. auto &type = this->get<SPIRType>(var.basetype);
  3093. bool block = has_decoration(type.self, DecorationBlock);
  3094. if (var.storage != StorageClassInput)
  3095. return;
  3096. bool is_hidden = is_hidden_io_variable(var);
  3097. bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
  3098. if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
  3099. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  3100. {
  3101. if (block)
  3102. {
  3103. auto type_name = to_name(type.self);
  3104. auto var_name = to_name(var.self);
  3105. bool is_per_vertex = has_decoration(var.self, DecorationPerVertexKHR);
  3106. uint32_t array_size = is_per_vertex ? to_array_size_literal(type) : 0;
  3107. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
  3108. {
  3109. auto mbr_name = to_member_name(type, mbr_idx);
  3110. auto flat_name = join(type_name, "_", mbr_name);
  3111. if (is_per_vertex)
  3112. {
  3113. for (uint32_t i = 0; i < array_size; i++)
  3114. statement(var_name, "[", i, "].", mbr_name, " = GetAttributeAtVertex(stage_input.", flat_name, ", ", i, ");");
  3115. }
  3116. else
  3117. {
  3118. statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";");
  3119. }
  3120. }
  3121. }
  3122. else
  3123. {
  3124. auto name = to_name(var.self);
  3125. auto &mtype = this->get<SPIRType>(var.basetype);
  3126. if (need_matrix_unroll && mtype.columns > 1)
  3127. {
  3128. // Unroll matrices.
  3129. for (uint32_t col = 0; col < mtype.columns; col++)
  3130. statement(name, "[", col, "] = stage_input.", name, "_", col, ";");
  3131. }
  3132. else if (has_decoration(var.self, DecorationPerVertexKHR))
  3133. {
  3134. uint32_t array_size = to_array_size_literal(type);
  3135. for (uint32_t i = 0; i < array_size; i++)
  3136. statement(name, "[", i, "]", " = GetAttributeAtVertex(stage_input.", name, ", ", i, ");");
  3137. }
  3138. else
  3139. {
  3140. if (execution.model == ExecutionModelGeometry)
  3141. {
  3142. statement("for (int i = 0; i < ", input_vertices, "; i++)");
  3143. begin_scope();
  3144. statement(name, "[i] = stage_input[i].", name, ";");
  3145. end_scope();
  3146. }
  3147. else
  3148. statement(name, " = stage_input.", name, ";");
  3149. }
  3150. }
  3151. }
  3152. });
  3153. // Run the shader.
  3154. if (execution.model == ExecutionModelVertex || execution.model == ExecutionModelFragment ||
  3155. execution.model == ExecutionModelGLCompute || execution.model == ExecutionModelMeshEXT ||
  3156. execution.model == ExecutionModelGeometry || execution.model == ExecutionModelTaskEXT)
  3157. {
  3158. // For mesh shaders, we receive special arguments that we must pass down as function arguments.
  3159. // HLSL does not support proper reference types for passing these IO blocks,
  3160. // but DXC post-inlining seems to magically fix it up anyways *shrug*.
  3161. SmallVector<string> arglist;
  3162. auto &func = get<SPIRFunction>(ir.default_entry_point);
  3163. // The arguments are marked out, avoid detecting reads and emitting inout.
  3164. for (auto &arg : func.arguments)
  3165. arglist.push_back(to_expression(arg.id, false));
  3166. if (execution.model == ExecutionModelGeometry)
  3167. {
  3168. arglist.push_back("stage_input");
  3169. arglist.push_back("geometry_stream");
  3170. }
  3171. statement(get_inner_entry_point_name(), "(", merge(arglist), ");");
  3172. }
  3173. else
  3174. SPIRV_CROSS_THROW("Unsupported shader stage.");
  3175. // Copy stage outputs.
  3176. if (require_output)
  3177. {
  3178. statement("SPIRV_Cross_Output stage_output;");
  3179. // Copy builtins from globals to return struct.
  3180. active_output_builtins.for_each_bit([&](uint32_t i) {
  3181. // PointSize doesn't exist in HLSL SM 4+.
  3182. if (i == BuiltInPointSize && !legacy)
  3183. return;
  3184. switch (static_cast<BuiltIn>(i))
  3185. {
  3186. case BuiltInClipDistance:
  3187. for (uint32_t clip = 0; clip < clip_distance_count; clip++)
  3188. statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[",
  3189. clip, "];");
  3190. break;
  3191. case BuiltInCullDistance:
  3192. for (uint32_t cull = 0; cull < cull_distance_count; cull++)
  3193. statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[",
  3194. cull, "];");
  3195. break;
  3196. case BuiltInSampleMask:
  3197. statement("stage_output.gl_SampleMask = gl_SampleMask[0];");
  3198. break;
  3199. default:
  3200. {
  3201. auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput);
  3202. statement("stage_output.", builtin_expr, " = ", builtin_expr, ";");
  3203. break;
  3204. }
  3205. }
  3206. });
  3207. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3208. auto &type = this->get<SPIRType>(var.basetype);
  3209. bool block = has_decoration(type.self, DecorationBlock);
  3210. if (var.storage != StorageClassOutput)
  3211. return;
  3212. if (!var.remapped_variable && type.pointer &&
  3213. !is_builtin_variable(var) &&
  3214. interface_variable_exists_in_entry_point(var.self))
  3215. {
  3216. if (block)
  3217. {
  3218. // I/O blocks need to flatten output.
  3219. auto type_name = to_name(type.self);
  3220. auto var_name = to_name(var.self);
  3221. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
  3222. {
  3223. auto mbr_name = to_member_name(type, mbr_idx);
  3224. auto flat_name = join(type_name, "_", mbr_name);
  3225. statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";");
  3226. }
  3227. }
  3228. else
  3229. {
  3230. auto name = to_name(var.self);
  3231. if (legacy && execution.model == ExecutionModelFragment)
  3232. {
  3233. string output_filler;
  3234. for (uint32_t size = type.vecsize; size < 4; ++size)
  3235. output_filler += ", 0.0";
  3236. statement("stage_output.", name, " = float4(", name, output_filler, ");");
  3237. }
  3238. else
  3239. {
  3240. statement("stage_output.", name, " = ", name, ";");
  3241. }
  3242. }
  3243. }
  3244. });
  3245. statement("return stage_output;");
  3246. }
  3247. end_scope();
  3248. }
  3249. void CompilerHLSL::emit_fixup()
  3250. {
  3251. if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition))
  3252. {
  3253. // Do various mangling on the gl_Position.
  3254. if (hlsl_options.shader_model <= 30)
  3255. {
  3256. statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * "
  3257. "gl_Position.w;");
  3258. statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * "
  3259. "gl_Position.w;");
  3260. }
  3261. if (options.vertex.flip_vert_y)
  3262. statement("gl_Position.y = -gl_Position.y;");
  3263. if (options.vertex.fixup_clipspace)
  3264. statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;");
  3265. }
  3266. }
  3267. void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
  3268. {
  3269. if (sparse)
  3270. SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL.");
  3271. auto *ops = stream(i);
  3272. auto op = static_cast<Op>(i.op);
  3273. uint32_t length = i.length;
  3274. SmallVector<uint32_t> inherited_expressions;
  3275. uint32_t result_type = ops[0];
  3276. uint32_t id = ops[1];
  3277. VariableID img = ops[2];
  3278. uint32_t coord = ops[3];
  3279. uint32_t dref = 0;
  3280. uint32_t comp = 0;
  3281. bool gather = false;
  3282. bool proj = false;
  3283. const uint32_t *opt = nullptr;
  3284. auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img);
  3285. if (combined_image && has_decoration(img, DecorationNonUniform))
  3286. {
  3287. set_decoration(combined_image->image, DecorationNonUniform);
  3288. set_decoration(combined_image->sampler, DecorationNonUniform);
  3289. }
  3290. auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img);
  3291. inherited_expressions.push_back(coord);
  3292. switch (op)
  3293. {
  3294. case OpImageSampleDrefImplicitLod:
  3295. case OpImageSampleDrefExplicitLod:
  3296. dref = ops[4];
  3297. opt = &ops[5];
  3298. length -= 5;
  3299. break;
  3300. case OpImageSampleProjDrefImplicitLod:
  3301. case OpImageSampleProjDrefExplicitLod:
  3302. dref = ops[4];
  3303. proj = true;
  3304. opt = &ops[5];
  3305. length -= 5;
  3306. break;
  3307. case OpImageDrefGather:
  3308. dref = ops[4];
  3309. opt = &ops[5];
  3310. gather = true;
  3311. length -= 5;
  3312. break;
  3313. case OpImageGather:
  3314. comp = ops[4];
  3315. opt = &ops[5];
  3316. gather = true;
  3317. length -= 5;
  3318. break;
  3319. case OpImageSampleProjImplicitLod:
  3320. case OpImageSampleProjExplicitLod:
  3321. opt = &ops[4];
  3322. length -= 4;
  3323. proj = true;
  3324. break;
  3325. case OpImageQueryLod:
  3326. opt = &ops[4];
  3327. length -= 4;
  3328. break;
  3329. default:
  3330. opt = &ops[4];
  3331. length -= 4;
  3332. break;
  3333. }
  3334. auto &imgtype = expression_type(img);
  3335. uint32_t coord_components = 0;
  3336. switch (imgtype.image.dim)
  3337. {
  3338. case Dim1D:
  3339. coord_components = 1;
  3340. break;
  3341. case Dim2D:
  3342. coord_components = 2;
  3343. break;
  3344. case Dim3D:
  3345. coord_components = 3;
  3346. break;
  3347. case DimCube:
  3348. coord_components = 3;
  3349. break;
  3350. case DimBuffer:
  3351. coord_components = 1;
  3352. break;
  3353. default:
  3354. coord_components = 2;
  3355. break;
  3356. }
  3357. if (dref)
  3358. inherited_expressions.push_back(dref);
  3359. if (imgtype.image.arrayed && op != OpImageQueryLod)
  3360. coord_components++;
  3361. uint32_t bias = 0;
  3362. uint32_t lod = 0;
  3363. uint32_t grad_x = 0;
  3364. uint32_t grad_y = 0;
  3365. uint32_t coffset = 0;
  3366. uint32_t offset = 0;
  3367. uint32_t coffsets = 0;
  3368. uint32_t sample = 0;
  3369. uint32_t minlod = 0;
  3370. uint32_t flags = 0;
  3371. if (length)
  3372. {
  3373. flags = opt[0];
  3374. opt++;
  3375. length--;
  3376. }
  3377. auto test = [&](uint32_t &v, uint32_t flag) {
  3378. if (length && (flags & flag))
  3379. {
  3380. v = *opt++;
  3381. inherited_expressions.push_back(v);
  3382. length--;
  3383. }
  3384. };
  3385. test(bias, ImageOperandsBiasMask);
  3386. test(lod, ImageOperandsLodMask);
  3387. test(grad_x, ImageOperandsGradMask);
  3388. test(grad_y, ImageOperandsGradMask);
  3389. test(coffset, ImageOperandsConstOffsetMask);
  3390. test(offset, ImageOperandsOffsetMask);
  3391. test(coffsets, ImageOperandsConstOffsetsMask);
  3392. test(sample, ImageOperandsSampleMask);
  3393. test(minlod, ImageOperandsMinLodMask);
  3394. string expr;
  3395. string texop;
  3396. if (minlod != 0)
  3397. SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL.");
  3398. if (op == OpImageFetch)
  3399. {
  3400. if (hlsl_options.shader_model < 40)
  3401. {
  3402. SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3.");
  3403. }
  3404. texop += img_expr;
  3405. texop += ".Load";
  3406. }
  3407. else if (op == OpImageQueryLod)
  3408. {
  3409. texop += img_expr;
  3410. texop += ".CalculateLevelOfDetail";
  3411. }
  3412. else
  3413. {
  3414. auto &imgformat = get<SPIRType>(imgtype.image.type);
  3415. if (hlsl_options.shader_model < 67 && imgformat.basetype != SPIRType::Float)
  3416. {
  3417. SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL SM < 6.7.");
  3418. }
  3419. if (hlsl_options.shader_model >= 40)
  3420. {
  3421. texop += img_expr;
  3422. if (is_depth_image(imgtype, img))
  3423. {
  3424. if (gather)
  3425. {
  3426. texop += ".GatherCmp";
  3427. }
  3428. else if (lod || grad_x || grad_y)
  3429. {
  3430. // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero.
  3431. texop += ".SampleCmpLevelZero";
  3432. }
  3433. else
  3434. texop += ".SampleCmp";
  3435. }
  3436. else if (gather)
  3437. {
  3438. uint32_t comp_num = evaluate_constant_u32(comp);
  3439. if (hlsl_options.shader_model >= 50)
  3440. {
  3441. switch (comp_num)
  3442. {
  3443. case 0:
  3444. texop += ".GatherRed";
  3445. break;
  3446. case 1:
  3447. texop += ".GatherGreen";
  3448. break;
  3449. case 2:
  3450. texop += ".GatherBlue";
  3451. break;
  3452. case 3:
  3453. texop += ".GatherAlpha";
  3454. break;
  3455. default:
  3456. SPIRV_CROSS_THROW("Invalid component.");
  3457. }
  3458. }
  3459. else
  3460. {
  3461. if (comp_num == 0)
  3462. texop += ".Gather";
  3463. else
  3464. SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component.");
  3465. }
  3466. }
  3467. else if (bias)
  3468. texop += ".SampleBias";
  3469. else if (grad_x || grad_y)
  3470. texop += ".SampleGrad";
  3471. else if (lod)
  3472. texop += ".SampleLevel";
  3473. else
  3474. texop += ".Sample";
  3475. }
  3476. else
  3477. {
  3478. switch (imgtype.image.dim)
  3479. {
  3480. case Dim1D:
  3481. texop += "tex1D";
  3482. break;
  3483. case Dim2D:
  3484. texop += "tex2D";
  3485. break;
  3486. case Dim3D:
  3487. texop += "tex3D";
  3488. break;
  3489. case DimCube:
  3490. texop += "texCUBE";
  3491. break;
  3492. case DimRect:
  3493. case DimBuffer:
  3494. case DimSubpassData:
  3495. SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO
  3496. default:
  3497. SPIRV_CROSS_THROW("Invalid dimension.");
  3498. }
  3499. if (gather)
  3500. SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3.");
  3501. if (offset || coffset)
  3502. SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3.");
  3503. if (grad_x || grad_y)
  3504. texop += "grad";
  3505. else if (lod)
  3506. texop += "lod";
  3507. else if (bias)
  3508. texop += "bias";
  3509. else if (proj || dref)
  3510. texop += "proj";
  3511. }
  3512. }
  3513. expr += texop;
  3514. expr += "(";
  3515. if (hlsl_options.shader_model < 40)
  3516. {
  3517. if (combined_image)
  3518. SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3.");
  3519. expr += to_expression(img);
  3520. }
  3521. else if (op != OpImageFetch)
  3522. {
  3523. string sampler_expr;
  3524. if (combined_image)
  3525. sampler_expr = to_non_uniform_aware_expression(combined_image->sampler);
  3526. else
  3527. sampler_expr = to_sampler_expression(img);
  3528. expr += sampler_expr;
  3529. }
  3530. auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * {
  3531. if (comps == in_comps)
  3532. return "";
  3533. switch (comps)
  3534. {
  3535. case 1:
  3536. return ".x";
  3537. case 2:
  3538. return ".xy";
  3539. case 3:
  3540. return ".xyz";
  3541. default:
  3542. return "";
  3543. }
  3544. };
  3545. bool forward = should_forward(coord);
  3546. // The IR can give us more components than we need, so chop them off as needed.
  3547. string coord_expr;
  3548. auto &coord_type = expression_type(coord);
  3549. if (coord_components != coord_type.vecsize)
  3550. coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize);
  3551. else
  3552. coord_expr = to_expression(coord);
  3553. if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us.
  3554. coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components);
  3555. if (hlsl_options.shader_model < 40)
  3556. {
  3557. if (dref)
  3558. {
  3559. if (imgtype.image.dim != Dim1D && imgtype.image.dim != Dim2D)
  3560. {
  3561. SPIRV_CROSS_THROW(
  3562. "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3.");
  3563. }
  3564. if (grad_x || grad_y)
  3565. SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3.");
  3566. for (uint32_t size = coord_components; size < 2; ++size)
  3567. coord_expr += ", 0.0";
  3568. forward = forward && should_forward(dref);
  3569. coord_expr += ", " + to_expression(dref);
  3570. }
  3571. else if (lod || bias || proj)
  3572. {
  3573. for (uint32_t size = coord_components; size < 3; ++size)
  3574. coord_expr += ", 0.0";
  3575. }
  3576. if (lod)
  3577. {
  3578. coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")";
  3579. }
  3580. else if (bias)
  3581. {
  3582. coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")";
  3583. }
  3584. else if (proj)
  3585. {
  3586. coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")";
  3587. }
  3588. else if (dref)
  3589. {
  3590. // A "normal" sample gets fed into tex2Dproj as well, because the
  3591. // regular tex2D accepts only two coordinates.
  3592. coord_expr = "float4(" + coord_expr + ", 1.0)";
  3593. }
  3594. if (!!lod + !!bias + !!proj > 1)
  3595. SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers.");
  3596. }
  3597. if (op == OpImageFetch)
  3598. {
  3599. if (imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  3600. coord_expr =
  3601. join("int", coord_components + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")");
  3602. }
  3603. else
  3604. expr += ", ";
  3605. expr += coord_expr;
  3606. if (dref && hlsl_options.shader_model >= 40)
  3607. {
  3608. forward = forward && should_forward(dref);
  3609. expr += ", ";
  3610. if (proj)
  3611. expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components);
  3612. else
  3613. expr += to_expression(dref);
  3614. }
  3615. if (!dref && (grad_x || grad_y))
  3616. {
  3617. forward = forward && should_forward(grad_x);
  3618. forward = forward && should_forward(grad_y);
  3619. expr += ", ";
  3620. expr += to_expression(grad_x);
  3621. expr += ", ";
  3622. expr += to_expression(grad_y);
  3623. }
  3624. if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch)
  3625. {
  3626. forward = forward && should_forward(lod);
  3627. expr += ", ";
  3628. expr += to_expression(lod);
  3629. }
  3630. if (!dref && bias && hlsl_options.shader_model >= 40)
  3631. {
  3632. forward = forward && should_forward(bias);
  3633. expr += ", ";
  3634. expr += to_expression(bias);
  3635. }
  3636. if (coffset)
  3637. {
  3638. forward = forward && should_forward(coffset);
  3639. expr += ", ";
  3640. expr += to_expression(coffset);
  3641. }
  3642. else if (offset)
  3643. {
  3644. forward = forward && should_forward(offset);
  3645. expr += ", ";
  3646. expr += to_expression(offset);
  3647. }
  3648. if (sample)
  3649. {
  3650. expr += ", ";
  3651. expr += to_expression(sample);
  3652. }
  3653. expr += ")";
  3654. if (dref && hlsl_options.shader_model < 40)
  3655. expr += ".x";
  3656. if (op == OpImageQueryLod)
  3657. {
  3658. // This is rather awkward.
  3659. // textureQueryLod returns two values, the "accessed level",
  3660. // as well as the actual LOD lambda.
  3661. // As far as I can tell, there is no way to get the .x component
  3662. // according to GLSL spec, and it depends on the sampler itself.
  3663. // Just assume X == Y, so we will need to splat the result to a float2.
  3664. statement("float _", id, "_tmp = ", expr, ";");
  3665. statement("float2 _", id, " = _", id, "_tmp.xx;");
  3666. set<SPIRExpression>(id, join("_", id), result_type, true);
  3667. }
  3668. else
  3669. {
  3670. emit_op(result_type, id, expr, forward, false);
  3671. }
  3672. for (auto &inherit : inherited_expressions)
  3673. inherit_expression_dependencies(id, inherit);
  3674. switch (op)
  3675. {
  3676. case OpImageSampleDrefImplicitLod:
  3677. case OpImageSampleImplicitLod:
  3678. case OpImageSampleProjImplicitLod:
  3679. case OpImageSampleProjDrefImplicitLod:
  3680. register_control_dependent_expression(id);
  3681. break;
  3682. default:
  3683. break;
  3684. }
  3685. }
  3686. string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
  3687. {
  3688. const auto &type = get<SPIRType>(var.basetype);
  3689. // We can remap push constant blocks, even if they don't have any binding decoration.
  3690. if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding))
  3691. return "";
  3692. char space = '\0';
  3693. HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT;
  3694. switch (type.basetype)
  3695. {
  3696. case SPIRType::SampledImage:
  3697. space = 't'; // SRV
  3698. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3699. break;
  3700. case SPIRType::Image:
  3701. if (type.image.sampled == 2 && type.image.dim != DimSubpassData)
  3702. {
  3703. if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv)
  3704. {
  3705. space = 't'; // SRV
  3706. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3707. }
  3708. else
  3709. {
  3710. space = 'u'; // UAV
  3711. resource_flags = HLSL_BINDING_AUTO_UAV_BIT;
  3712. }
  3713. }
  3714. else
  3715. {
  3716. space = 't'; // SRV
  3717. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3718. }
  3719. break;
  3720. case SPIRType::Sampler:
  3721. space = 's';
  3722. resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT;
  3723. break;
  3724. case SPIRType::AccelerationStructure:
  3725. space = 't'; // SRV
  3726. resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
  3727. break;
  3728. case SPIRType::Struct:
  3729. {
  3730. auto storage = type.storage;
  3731. if (storage == StorageClassUniform)
  3732. {
  3733. if (has_decoration(type.self, DecorationBufferBlock))
  3734. {
  3735. Bitset flags = ir.get_buffer_block_flags(var);
  3736. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  3737. space = is_readonly ? 't' : 'u'; // UAV
  3738. resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
  3739. }
  3740. else if (has_decoration(type.self, DecorationBlock))
  3741. {
  3742. space = 'b'; // Constant buffers
  3743. resource_flags = HLSL_BINDING_AUTO_CBV_BIT;
  3744. }
  3745. }
  3746. else if (storage == StorageClassPushConstant)
  3747. {
  3748. space = 'b'; // Constant buffers
  3749. resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT;
  3750. }
  3751. else if (storage == StorageClassStorageBuffer)
  3752. {
  3753. // UAV or SRV depending on readonly flag.
  3754. Bitset flags = ir.get_buffer_block_flags(var);
  3755. bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
  3756. space = is_readonly ? 't' : 'u';
  3757. resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
  3758. }
  3759. break;
  3760. }
  3761. default:
  3762. break;
  3763. }
  3764. if (!space)
  3765. return "";
  3766. uint32_t desc_set =
  3767. resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u;
  3768. uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u;
  3769. if (has_decoration(var.self, DecorationBinding))
  3770. binding = get_decoration(var.self, DecorationBinding);
  3771. if (has_decoration(var.self, DecorationDescriptorSet))
  3772. desc_set = get_decoration(var.self, DecorationDescriptorSet);
  3773. return to_resource_register(resource_flags, space, binding, desc_set);
  3774. }
  3775. string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
  3776. {
  3777. // For combined image samplers.
  3778. if (!has_decoration(var.self, DecorationBinding))
  3779. return "";
  3780. return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding),
  3781. get_decoration(var.self, DecorationDescriptorSet));
  3782. }
  3783. void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding)
  3784. {
  3785. auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding });
  3786. if (itr != end(resource_bindings))
  3787. {
  3788. auto &remap = itr->second;
  3789. remap.second = true;
  3790. switch (type)
  3791. {
  3792. case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT:
  3793. case HLSL_BINDING_AUTO_CBV_BIT:
  3794. desc_set = remap.first.cbv.register_space;
  3795. binding = remap.first.cbv.register_binding;
  3796. break;
  3797. case HLSL_BINDING_AUTO_SRV_BIT:
  3798. desc_set = remap.first.srv.register_space;
  3799. binding = remap.first.srv.register_binding;
  3800. break;
  3801. case HLSL_BINDING_AUTO_SAMPLER_BIT:
  3802. desc_set = remap.first.sampler.register_space;
  3803. binding = remap.first.sampler.register_binding;
  3804. break;
  3805. case HLSL_BINDING_AUTO_UAV_BIT:
  3806. desc_set = remap.first.uav.register_space;
  3807. binding = remap.first.uav.register_binding;
  3808. break;
  3809. default:
  3810. break;
  3811. }
  3812. }
  3813. }
  3814. string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set)
  3815. {
  3816. if ((flag & resource_binding_flags) == 0)
  3817. {
  3818. remap_hlsl_resource_binding(flag, space_set, binding);
  3819. // The push constant block did not have a binding, and there were no remap for it,
  3820. // so, declare without register binding.
  3821. if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet)
  3822. return "";
  3823. if (hlsl_options.shader_model >= 51)
  3824. return join(" : register(", space, binding, ", space", space_set, ")");
  3825. else
  3826. return join(" : register(", space, binding, ")");
  3827. }
  3828. else
  3829. return "";
  3830. }
  3831. void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
  3832. {
  3833. auto &type = get<SPIRType>(var.basetype);
  3834. switch (type.basetype)
  3835. {
  3836. case SPIRType::SampledImage:
  3837. case SPIRType::Image:
  3838. {
  3839. bool is_coherent = false;
  3840. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  3841. is_coherent = has_decoration(var.self, DecorationCoherent);
  3842. statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ",
  3843. to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";");
  3844. if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
  3845. {
  3846. // For combined image samplers, also emit a combined image sampler.
  3847. if (is_depth_image(type, var.self))
  3848. statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self),
  3849. to_resource_binding_sampler(var), ";");
  3850. else
  3851. statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self),
  3852. to_resource_binding_sampler(var), ";");
  3853. }
  3854. break;
  3855. }
  3856. case SPIRType::Sampler:
  3857. if (comparison_ids.count(var.self))
  3858. statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var),
  3859. ";");
  3860. else
  3861. statement("SamplerState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";");
  3862. break;
  3863. default:
  3864. statement(variable_decl(var), to_resource_binding(var), ";");
  3865. break;
  3866. }
  3867. }
  3868. void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var)
  3869. {
  3870. auto &type = get<SPIRType>(var.basetype);
  3871. switch (type.basetype)
  3872. {
  3873. case SPIRType::Sampler:
  3874. case SPIRType::Image:
  3875. SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL.");
  3876. default:
  3877. statement(variable_decl(var), ";");
  3878. break;
  3879. }
  3880. }
  3881. void CompilerHLSL::emit_uniform(const SPIRVariable &var)
  3882. {
  3883. add_resource_name(var.self);
  3884. if (hlsl_options.shader_model >= 40)
  3885. emit_modern_uniform(var);
  3886. else
  3887. emit_legacy_uniform(var);
  3888. }
  3889. bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
  3890. {
  3891. return false;
  3892. }
  3893. void CompilerHLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist)
  3894. {
  3895. CompilerGLSL::append_global_func_args(func, index, arglist);
  3896. if (func.emits_geometry)
  3897. arglist.push_back("geometry_stream");
  3898. }
  3899. string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  3900. {
  3901. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
  3902. return type_to_glsl(out_type);
  3903. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
  3904. return type_to_glsl(out_type);
  3905. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  3906. return "asuint";
  3907. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
  3908. return type_to_glsl(out_type);
  3909. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
  3910. return type_to_glsl(out_type);
  3911. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  3912. return "asint";
  3913. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  3914. return "asfloat";
  3915. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  3916. return "asfloat";
  3917. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  3918. SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL.");
  3919. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  3920. SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL.");
  3921. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  3922. return "asdouble";
  3923. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  3924. return "asdouble";
  3925. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  3926. {
  3927. if (!requires_explicit_fp16_packing)
  3928. {
  3929. requires_explicit_fp16_packing = true;
  3930. force_recompile();
  3931. }
  3932. return "spvUnpackFloat2x16";
  3933. }
  3934. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  3935. {
  3936. if (!requires_explicit_fp16_packing)
  3937. {
  3938. requires_explicit_fp16_packing = true;
  3939. force_recompile();
  3940. }
  3941. return "spvPackFloat2x16";
  3942. }
  3943. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  3944. {
  3945. if (hlsl_options.shader_model < 40)
  3946. SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4.");
  3947. return "(" + type_to_glsl(out_type) + ")f32tof16";
  3948. }
  3949. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  3950. {
  3951. if (hlsl_options.shader_model < 40)
  3952. SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4.");
  3953. return "(" + type_to_glsl(out_type) + ")f16tof32";
  3954. }
  3955. else
  3956. return "";
  3957. }
  3958. void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
  3959. {
  3960. auto op = static_cast<GLSLstd450>(eop);
  3961. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  3962. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count);
  3963. auto int_type = to_signed_basetype(integer_width);
  3964. auto uint_type = to_unsigned_basetype(integer_width);
  3965. op = get_remapped_glsl_op(op);
  3966. switch (op)
  3967. {
  3968. case GLSLstd450InverseSqrt:
  3969. emit_unary_func_op(result_type, id, args[0], "rsqrt");
  3970. break;
  3971. case GLSLstd450Fract:
  3972. emit_unary_func_op(result_type, id, args[0], "frac");
  3973. break;
  3974. case GLSLstd450RoundEven:
  3975. if (hlsl_options.shader_model < 40)
  3976. SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3.");
  3977. emit_unary_func_op(result_type, id, args[0], "round");
  3978. break;
  3979. case GLSLstd450Trunc:
  3980. emit_unary_func_op(result_type, id, args[0], "trunc");
  3981. break;
  3982. case GLSLstd450Acosh:
  3983. case GLSLstd450Asinh:
  3984. case GLSLstd450Atanh:
  3985. // These are not supported in HLSL, always emulate them.
  3986. emit_emulated_ahyper_op(result_type, id, args[0], op);
  3987. break;
  3988. case GLSLstd450FMix:
  3989. case GLSLstd450IMix:
  3990. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp");
  3991. break;
  3992. case GLSLstd450Atan2:
  3993. emit_binary_func_op(result_type, id, args[0], args[1], "atan2");
  3994. break;
  3995. case GLSLstd450Fma:
  3996. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad");
  3997. break;
  3998. case GLSLstd450InterpolateAtCentroid:
  3999. emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid");
  4000. break;
  4001. case GLSLstd450InterpolateAtSample:
  4002. emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample");
  4003. break;
  4004. case GLSLstd450InterpolateAtOffset:
  4005. emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped");
  4006. break;
  4007. case GLSLstd450PackHalf2x16:
  4008. if (!requires_fp16_packing)
  4009. {
  4010. requires_fp16_packing = true;
  4011. force_recompile();
  4012. }
  4013. emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16");
  4014. break;
  4015. case GLSLstd450UnpackHalf2x16:
  4016. if (!requires_fp16_packing)
  4017. {
  4018. requires_fp16_packing = true;
  4019. force_recompile();
  4020. }
  4021. emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16");
  4022. break;
  4023. case GLSLstd450PackSnorm4x8:
  4024. if (!requires_snorm8_packing)
  4025. {
  4026. requires_snorm8_packing = true;
  4027. force_recompile();
  4028. }
  4029. emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8");
  4030. break;
  4031. case GLSLstd450UnpackSnorm4x8:
  4032. if (!requires_snorm8_packing)
  4033. {
  4034. requires_snorm8_packing = true;
  4035. force_recompile();
  4036. }
  4037. emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8");
  4038. break;
  4039. case GLSLstd450PackUnorm4x8:
  4040. if (!requires_unorm8_packing)
  4041. {
  4042. requires_unorm8_packing = true;
  4043. force_recompile();
  4044. }
  4045. emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8");
  4046. break;
  4047. case GLSLstd450UnpackUnorm4x8:
  4048. if (!requires_unorm8_packing)
  4049. {
  4050. requires_unorm8_packing = true;
  4051. force_recompile();
  4052. }
  4053. emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8");
  4054. break;
  4055. case GLSLstd450PackSnorm2x16:
  4056. if (!requires_snorm16_packing)
  4057. {
  4058. requires_snorm16_packing = true;
  4059. force_recompile();
  4060. }
  4061. emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16");
  4062. break;
  4063. case GLSLstd450UnpackSnorm2x16:
  4064. if (!requires_snorm16_packing)
  4065. {
  4066. requires_snorm16_packing = true;
  4067. force_recompile();
  4068. }
  4069. emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16");
  4070. break;
  4071. case GLSLstd450PackUnorm2x16:
  4072. if (!requires_unorm16_packing)
  4073. {
  4074. requires_unorm16_packing = true;
  4075. force_recompile();
  4076. }
  4077. emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16");
  4078. break;
  4079. case GLSLstd450UnpackUnorm2x16:
  4080. if (!requires_unorm16_packing)
  4081. {
  4082. requires_unorm16_packing = true;
  4083. force_recompile();
  4084. }
  4085. emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16");
  4086. break;
  4087. case GLSLstd450PackDouble2x32:
  4088. case GLSLstd450UnpackDouble2x32:
  4089. SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
  4090. case GLSLstd450FindILsb:
  4091. {
  4092. auto basetype = expression_type(args[0]).basetype;
  4093. emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype);
  4094. break;
  4095. }
  4096. case GLSLstd450FindSMsb:
  4097. emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type);
  4098. break;
  4099. case GLSLstd450FindUMsb:
  4100. emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type);
  4101. break;
  4102. case GLSLstd450MatrixInverse:
  4103. {
  4104. auto &type = get<SPIRType>(result_type);
  4105. if (type.vecsize == 2 && type.columns == 2)
  4106. {
  4107. if (!requires_inverse_2x2)
  4108. {
  4109. requires_inverse_2x2 = true;
  4110. force_recompile();
  4111. }
  4112. }
  4113. else if (type.vecsize == 3 && type.columns == 3)
  4114. {
  4115. if (!requires_inverse_3x3)
  4116. {
  4117. requires_inverse_3x3 = true;
  4118. force_recompile();
  4119. }
  4120. }
  4121. else if (type.vecsize == 4 && type.columns == 4)
  4122. {
  4123. if (!requires_inverse_4x4)
  4124. {
  4125. requires_inverse_4x4 = true;
  4126. force_recompile();
  4127. }
  4128. }
  4129. emit_unary_func_op(result_type, id, args[0], "spvInverse");
  4130. break;
  4131. }
  4132. case GLSLstd450Normalize:
  4133. // HLSL does not support scalar versions here.
  4134. if (expression_type(args[0]).vecsize == 1)
  4135. {
  4136. // Returns -1 or 1 for valid input, sign() does the job.
  4137. emit_unary_func_op(result_type, id, args[0], "sign");
  4138. }
  4139. else
  4140. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  4141. break;
  4142. case GLSLstd450Reflect:
  4143. if (get<SPIRType>(result_type).vecsize == 1)
  4144. {
  4145. if (!requires_scalar_reflect)
  4146. {
  4147. requires_scalar_reflect = true;
  4148. force_recompile();
  4149. }
  4150. emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
  4151. }
  4152. else
  4153. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  4154. break;
  4155. case GLSLstd450Refract:
  4156. if (get<SPIRType>(result_type).vecsize == 1)
  4157. {
  4158. if (!requires_scalar_refract)
  4159. {
  4160. requires_scalar_refract = true;
  4161. force_recompile();
  4162. }
  4163. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
  4164. }
  4165. else
  4166. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  4167. break;
  4168. case GLSLstd450FaceForward:
  4169. if (get<SPIRType>(result_type).vecsize == 1)
  4170. {
  4171. if (!requires_scalar_faceforward)
  4172. {
  4173. requires_scalar_faceforward = true;
  4174. force_recompile();
  4175. }
  4176. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
  4177. }
  4178. else
  4179. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  4180. break;
  4181. case GLSLstd450NMin:
  4182. CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMin, args, count);
  4183. break;
  4184. case GLSLstd450NMax:
  4185. CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMax, args, count);
  4186. break;
  4187. case GLSLstd450NClamp:
  4188. CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FClamp, args, count);
  4189. break;
  4190. default:
  4191. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  4192. break;
  4193. }
  4194. }
  4195. void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain)
  4196. {
  4197. auto &type = get<SPIRType>(chain.basetype);
  4198. // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
  4199. auto ident = get_unique_identifier();
  4200. statement("[unroll]");
  4201. statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
  4202. ident, "++)");
  4203. begin_scope();
  4204. auto subchain = chain;
  4205. subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
  4206. subchain.basetype = type.parent_type;
  4207. if (!get<SPIRType>(subchain.basetype).array.empty())
  4208. subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
  4209. read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain);
  4210. end_scope();
  4211. }
  4212. void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain)
  4213. {
  4214. auto &type = get<SPIRType>(chain.basetype);
  4215. auto subchain = chain;
  4216. uint32_t member_count = uint32_t(type.member_types.size());
  4217. for (uint32_t i = 0; i < member_count; i++)
  4218. {
  4219. uint32_t offset = type_struct_member_offset(type, i);
  4220. subchain.static_index = chain.static_index + offset;
  4221. subchain.basetype = type.member_types[i];
  4222. subchain.matrix_stride = 0;
  4223. subchain.array_stride = 0;
  4224. subchain.row_major_matrix = false;
  4225. auto &member_type = get<SPIRType>(subchain.basetype);
  4226. if (member_type.columns > 1)
  4227. {
  4228. subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
  4229. subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
  4230. }
  4231. if (!member_type.array.empty())
  4232. subchain.array_stride = type_struct_member_array_stride(type, i);
  4233. read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain);
  4234. }
  4235. }
  4236. void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain)
  4237. {
  4238. auto &type = get<SPIRType>(chain.basetype);
  4239. SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op };
  4240. target_type.basetype = SPIRType::UInt;
  4241. target_type.vecsize = type.vecsize;
  4242. target_type.columns = type.columns;
  4243. if (!type.array.empty())
  4244. {
  4245. read_access_chain_array(lhs, chain);
  4246. return;
  4247. }
  4248. else if (type.basetype == SPIRType::Struct)
  4249. {
  4250. read_access_chain_struct(lhs, chain);
  4251. return;
  4252. }
  4253. else if (type.width != 32 && !hlsl_options.enable_16bit_types)
  4254. SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and "
  4255. "native 16-bit types are enabled.");
  4256. string base = chain.base;
  4257. if (has_decoration(chain.self, DecorationNonUniform))
  4258. convert_non_uniform_expression(base, chain.self);
  4259. bool templated_load = hlsl_options.shader_model >= 62;
  4260. string load_expr;
  4261. string template_expr;
  4262. if (templated_load)
  4263. template_expr = join("<", type_to_glsl(type), ">");
  4264. // Load a vector or scalar.
  4265. if (type.columns == 1 && !chain.row_major_matrix)
  4266. {
  4267. const char *load_op = nullptr;
  4268. switch (type.vecsize)
  4269. {
  4270. case 1:
  4271. load_op = "Load";
  4272. break;
  4273. case 2:
  4274. load_op = "Load2";
  4275. break;
  4276. case 3:
  4277. load_op = "Load3";
  4278. break;
  4279. case 4:
  4280. load_op = "Load4";
  4281. break;
  4282. default:
  4283. SPIRV_CROSS_THROW("Unknown vector size.");
  4284. }
  4285. if (templated_load)
  4286. load_op = "Load";
  4287. load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")");
  4288. }
  4289. else if (type.columns == 1)
  4290. {
  4291. // Strided load since we are loading a column from a row-major matrix.
  4292. if (templated_load)
  4293. {
  4294. auto scalar_type = type;
  4295. scalar_type.vecsize = 1;
  4296. scalar_type.columns = 1;
  4297. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4298. if (type.vecsize > 1)
  4299. load_expr += type_to_glsl(type) + "(";
  4300. }
  4301. else if (type.vecsize > 1)
  4302. {
  4303. load_expr = type_to_glsl(target_type);
  4304. load_expr += "(";
  4305. }
  4306. for (uint32_t r = 0; r < type.vecsize; r++)
  4307. {
  4308. load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
  4309. chain.static_index + r * chain.matrix_stride, ")");
  4310. if (r + 1 < type.vecsize)
  4311. load_expr += ", ";
  4312. }
  4313. if (type.vecsize > 1)
  4314. load_expr += ")";
  4315. }
  4316. else if (!chain.row_major_matrix)
  4317. {
  4318. // Load a matrix, column-major, the easy case.
  4319. const char *load_op = nullptr;
  4320. switch (type.vecsize)
  4321. {
  4322. case 1:
  4323. load_op = "Load";
  4324. break;
  4325. case 2:
  4326. load_op = "Load2";
  4327. break;
  4328. case 3:
  4329. load_op = "Load3";
  4330. break;
  4331. case 4:
  4332. load_op = "Load4";
  4333. break;
  4334. default:
  4335. SPIRV_CROSS_THROW("Unknown vector size.");
  4336. }
  4337. if (templated_load)
  4338. {
  4339. auto vector_type = type;
  4340. vector_type.columns = 1;
  4341. template_expr = join("<", type_to_glsl(vector_type), ">");
  4342. load_expr = type_to_glsl(type);
  4343. load_op = "Load";
  4344. }
  4345. else
  4346. {
  4347. // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend,
  4348. // so row-major is technically column-major ...
  4349. load_expr = type_to_glsl(target_type);
  4350. }
  4351. load_expr += "(";
  4352. for (uint32_t c = 0; c < type.columns; c++)
  4353. {
  4354. load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index,
  4355. chain.static_index + c * chain.matrix_stride, ")");
  4356. if (c + 1 < type.columns)
  4357. load_expr += ", ";
  4358. }
  4359. load_expr += ")";
  4360. }
  4361. else
  4362. {
  4363. // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern
  4364. // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ...
  4365. if (templated_load)
  4366. {
  4367. load_expr = type_to_glsl(type);
  4368. auto scalar_type = type;
  4369. scalar_type.vecsize = 1;
  4370. scalar_type.columns = 1;
  4371. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4372. }
  4373. else
  4374. load_expr = type_to_glsl(target_type);
  4375. load_expr += "(";
  4376. for (uint32_t c = 0; c < type.columns; c++)
  4377. {
  4378. for (uint32_t r = 0; r < type.vecsize; r++)
  4379. {
  4380. load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
  4381. chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")");
  4382. if ((r + 1 < type.vecsize) || (c + 1 < type.columns))
  4383. load_expr += ", ";
  4384. }
  4385. }
  4386. load_expr += ")";
  4387. }
  4388. if (!templated_load)
  4389. {
  4390. auto bitcast_op = bitcast_glsl_op(type, target_type);
  4391. if (!bitcast_op.empty())
  4392. load_expr = join(bitcast_op, "(", load_expr, ")");
  4393. }
  4394. if (lhs.empty())
  4395. {
  4396. assert(expr);
  4397. *expr = std::move(load_expr);
  4398. }
  4399. else
  4400. statement(lhs, " = ", load_expr, ";");
  4401. }
  4402. void CompilerHLSL::emit_load(const Instruction &instruction)
  4403. {
  4404. auto ops = stream(instruction);
  4405. uint32_t result_type = ops[0];
  4406. uint32_t id = ops[1];
  4407. uint32_t ptr = ops[2];
  4408. auto *chain = maybe_get<SPIRAccessChain>(ptr);
  4409. if (chain)
  4410. {
  4411. auto &type = get<SPIRType>(result_type);
  4412. bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct;
  4413. if (composite_load)
  4414. {
  4415. // We cannot make this work in one single expression as we might have nested structures and arrays,
  4416. // so unroll the load to an uninitialized temporary.
  4417. emit_uninitialized_temporary_expression(result_type, id);
  4418. read_access_chain(nullptr, to_expression(id), *chain);
  4419. track_expression_read(chain->self);
  4420. }
  4421. else
  4422. {
  4423. string load_expr;
  4424. read_access_chain(&load_expr, "", *chain);
  4425. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  4426. // If we are forwarding this load,
  4427. // don't register the read to access chain here, defer that to when we actually use the expression,
  4428. // using the add_implied_read_expression mechanism.
  4429. if (!forward)
  4430. track_expression_read(chain->self);
  4431. // Do not forward complex load sequences like matrices, structs and arrays.
  4432. if (type.columns > 1)
  4433. forward = false;
  4434. auto &e = emit_op(result_type, id, load_expr, forward, true);
  4435. e.need_transpose = false;
  4436. register_read(id, ptr, forward);
  4437. inherit_expression_dependencies(id, ptr);
  4438. if (forward)
  4439. add_implied_read_expression(e, chain->self);
  4440. }
  4441. }
  4442. else
  4443. {
  4444. // Very special case where we cannot rely on IO lowering.
  4445. // Mesh shader clip/cull arrays ... Cursed.
  4446. auto &res_type = get<SPIRType>(result_type);
  4447. if (get_execution_model() == ExecutionModelMeshEXT &&
  4448. has_decoration(ptr, DecorationBuiltIn) &&
  4449. (get_decoration(ptr, DecorationBuiltIn) == BuiltInClipDistance ||
  4450. get_decoration(ptr, DecorationBuiltIn) == BuiltInCullDistance) &&
  4451. is_array(res_type) && !is_array(get<SPIRType>(res_type.parent_type)) &&
  4452. to_array_size_literal(res_type) > 1)
  4453. {
  4454. track_expression_read(ptr);
  4455. string load_expr = "{ ";
  4456. uint32_t num_elements = to_array_size_literal(res_type);
  4457. for (uint32_t i = 0; i < num_elements; i++)
  4458. {
  4459. load_expr += join(to_expression(ptr), ".", index_to_swizzle(i));
  4460. if (i + 1 < num_elements)
  4461. load_expr += ", ";
  4462. }
  4463. load_expr += " }";
  4464. emit_op(result_type, id, load_expr, false);
  4465. register_read(id, ptr, false);
  4466. inherit_expression_dependencies(id, ptr);
  4467. }
  4468. else
  4469. {
  4470. CompilerGLSL::emit_instruction(instruction);
  4471. }
  4472. }
  4473. }
  4474. void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
  4475. const SmallVector<uint32_t> &composite_chain)
  4476. {
  4477. auto *ptype = &get<SPIRType>(chain.basetype);
  4478. while (ptype->pointer)
  4479. {
  4480. ptype = &get<SPIRType>(ptype->basetype);
  4481. }
  4482. auto &type = *ptype;
  4483. // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
  4484. auto ident = get_unique_identifier();
  4485. uint32_t id = ir.increase_bound_by(2);
  4486. uint32_t int_type_id = id + 1;
  4487. SPIRType int_type { OpTypeInt };
  4488. int_type.basetype = SPIRType::Int;
  4489. int_type.width = 32;
  4490. set<SPIRType>(int_type_id, int_type);
  4491. set<SPIRExpression>(id, ident, int_type_id, true);
  4492. set_name(id, ident);
  4493. suppressed_usage_tracking.insert(id);
  4494. statement("[unroll]");
  4495. statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
  4496. ident, "++)");
  4497. begin_scope();
  4498. auto subchain = chain;
  4499. subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
  4500. subchain.basetype = type.parent_type;
  4501. // Forcefully allow us to use an ID here by setting MSB.
  4502. auto subcomposite_chain = composite_chain;
  4503. subcomposite_chain.push_back(0x80000000u | id);
  4504. if (!get<SPIRType>(subchain.basetype).array.empty())
  4505. subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
  4506. write_access_chain(subchain, value, subcomposite_chain);
  4507. end_scope();
  4508. }
  4509. void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
  4510. const SmallVector<uint32_t> &composite_chain)
  4511. {
  4512. auto &type = get<SPIRType>(chain.basetype);
  4513. uint32_t member_count = uint32_t(type.member_types.size());
  4514. auto subchain = chain;
  4515. auto subcomposite_chain = composite_chain;
  4516. subcomposite_chain.push_back(0);
  4517. for (uint32_t i = 0; i < member_count; i++)
  4518. {
  4519. uint32_t offset = type_struct_member_offset(type, i);
  4520. subchain.static_index = chain.static_index + offset;
  4521. subchain.basetype = type.member_types[i];
  4522. subchain.matrix_stride = 0;
  4523. subchain.array_stride = 0;
  4524. subchain.row_major_matrix = false;
  4525. auto &member_type = get<SPIRType>(subchain.basetype);
  4526. if (member_type.columns > 1)
  4527. {
  4528. subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
  4529. subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
  4530. }
  4531. if (!member_type.array.empty())
  4532. subchain.array_stride = type_struct_member_array_stride(type, i);
  4533. subcomposite_chain.back() = i;
  4534. write_access_chain(subchain, value, subcomposite_chain);
  4535. }
  4536. }
  4537. string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain,
  4538. bool enclose)
  4539. {
  4540. string ret;
  4541. if (composite_chain.empty())
  4542. ret = to_expression(value);
  4543. else
  4544. {
  4545. AccessChainMeta meta;
  4546. ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()),
  4547. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta);
  4548. }
  4549. if (enclose)
  4550. ret = enclose_expression(ret);
  4551. return ret;
  4552. }
  4553. void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value,
  4554. const SmallVector<uint32_t> &composite_chain)
  4555. {
  4556. auto &type = get<SPIRType>(chain.basetype);
  4557. // Make sure we trigger a read of the constituents in the access chain.
  4558. track_expression_read(chain.self);
  4559. SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op };
  4560. target_type.basetype = SPIRType::UInt;
  4561. target_type.vecsize = type.vecsize;
  4562. target_type.columns = type.columns;
  4563. if (!type.array.empty())
  4564. {
  4565. write_access_chain_array(chain, value, composite_chain);
  4566. register_write(chain.self);
  4567. return;
  4568. }
  4569. else if (type.basetype == SPIRType::Struct)
  4570. {
  4571. write_access_chain_struct(chain, value, composite_chain);
  4572. register_write(chain.self);
  4573. return;
  4574. }
  4575. else if (type.width != 32 && !hlsl_options.enable_16bit_types)
  4576. SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and "
  4577. "native 16-bit types are enabled.");
  4578. bool templated_store = hlsl_options.shader_model >= 62;
  4579. auto base = chain.base;
  4580. if (has_decoration(chain.self, DecorationNonUniform))
  4581. convert_non_uniform_expression(base, chain.self);
  4582. string template_expr;
  4583. if (templated_store)
  4584. template_expr = join("<", type_to_glsl(type), ">");
  4585. if (type.columns == 1 && !chain.row_major_matrix)
  4586. {
  4587. const char *store_op = nullptr;
  4588. switch (type.vecsize)
  4589. {
  4590. case 1:
  4591. store_op = "Store";
  4592. break;
  4593. case 2:
  4594. store_op = "Store2";
  4595. break;
  4596. case 3:
  4597. store_op = "Store3";
  4598. break;
  4599. case 4:
  4600. store_op = "Store4";
  4601. break;
  4602. default:
  4603. SPIRV_CROSS_THROW("Unknown vector size.");
  4604. }
  4605. auto store_expr = write_access_chain_value(value, composite_chain, false);
  4606. if (!templated_store)
  4607. {
  4608. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4609. if (!bitcast_op.empty())
  4610. store_expr = join(bitcast_op, "(", store_expr, ")");
  4611. }
  4612. else
  4613. store_op = "Store";
  4614. statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ",
  4615. store_expr, ");");
  4616. }
  4617. else if (type.columns == 1)
  4618. {
  4619. if (templated_store)
  4620. {
  4621. auto scalar_type = type;
  4622. scalar_type.vecsize = 1;
  4623. scalar_type.columns = 1;
  4624. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4625. }
  4626. // Strided store.
  4627. for (uint32_t r = 0; r < type.vecsize; r++)
  4628. {
  4629. auto store_expr = write_access_chain_value(value, composite_chain, true);
  4630. if (type.vecsize > 1)
  4631. {
  4632. store_expr += ".";
  4633. store_expr += index_to_swizzle(r);
  4634. }
  4635. remove_duplicate_swizzle(store_expr);
  4636. if (!templated_store)
  4637. {
  4638. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4639. if (!bitcast_op.empty())
  4640. store_expr = join(bitcast_op, "(", store_expr, ")");
  4641. }
  4642. statement(base, ".Store", template_expr, "(", chain.dynamic_index,
  4643. chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");");
  4644. }
  4645. }
  4646. else if (!chain.row_major_matrix)
  4647. {
  4648. const char *store_op = nullptr;
  4649. switch (type.vecsize)
  4650. {
  4651. case 1:
  4652. store_op = "Store";
  4653. break;
  4654. case 2:
  4655. store_op = "Store2";
  4656. break;
  4657. case 3:
  4658. store_op = "Store3";
  4659. break;
  4660. case 4:
  4661. store_op = "Store4";
  4662. break;
  4663. default:
  4664. SPIRV_CROSS_THROW("Unknown vector size.");
  4665. }
  4666. if (templated_store)
  4667. {
  4668. store_op = "Store";
  4669. auto vector_type = type;
  4670. vector_type.columns = 1;
  4671. template_expr = join("<", type_to_glsl(vector_type), ">");
  4672. }
  4673. for (uint32_t c = 0; c < type.columns; c++)
  4674. {
  4675. auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]");
  4676. if (!templated_store)
  4677. {
  4678. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4679. if (!bitcast_op.empty())
  4680. store_expr = join(bitcast_op, "(", store_expr, ")");
  4681. }
  4682. statement(base, ".", store_op, template_expr, "(", chain.dynamic_index,
  4683. chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");");
  4684. }
  4685. }
  4686. else
  4687. {
  4688. if (templated_store)
  4689. {
  4690. auto scalar_type = type;
  4691. scalar_type.vecsize = 1;
  4692. scalar_type.columns = 1;
  4693. template_expr = join("<", type_to_glsl(scalar_type), ">");
  4694. }
  4695. for (uint32_t r = 0; r < type.vecsize; r++)
  4696. {
  4697. for (uint32_t c = 0; c < type.columns; c++)
  4698. {
  4699. auto store_expr =
  4700. join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r));
  4701. remove_duplicate_swizzle(store_expr);
  4702. auto bitcast_op = bitcast_glsl_op(target_type, type);
  4703. if (!bitcast_op.empty())
  4704. store_expr = join(bitcast_op, "(", store_expr, ")");
  4705. statement(base, ".Store", template_expr, "(", chain.dynamic_index,
  4706. chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");");
  4707. }
  4708. }
  4709. }
  4710. register_write(chain.self);
  4711. }
  4712. void CompilerHLSL::emit_store(const Instruction &instruction)
  4713. {
  4714. auto ops = stream(instruction);
  4715. if (options.vertex.flip_vert_y)
  4716. {
  4717. auto *expr = maybe_get<SPIRExpression>(ops[0]);
  4718. if (expr != nullptr && expr->access_meshlet_position_y)
  4719. {
  4720. auto lhs = to_dereferenced_expression(ops[0]);
  4721. auto rhs = to_unpacked_expression(ops[1]);
  4722. statement(lhs, " = spvFlipVertY(", rhs, ");");
  4723. register_write(ops[0]);
  4724. return;
  4725. }
  4726. }
  4727. auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
  4728. if (chain)
  4729. write_access_chain(*chain, ops[1], {});
  4730. else
  4731. CompilerGLSL::emit_instruction(instruction);
  4732. }
  4733. void CompilerHLSL::emit_access_chain(const Instruction &instruction)
  4734. {
  4735. auto ops = stream(instruction);
  4736. uint32_t length = instruction.length;
  4737. bool need_byte_access_chain = false;
  4738. auto &type = expression_type(ops[2]);
  4739. const auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
  4740. if (chain)
  4741. {
  4742. // Keep tacking on an existing access chain.
  4743. need_byte_access_chain = true;
  4744. }
  4745. else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock))
  4746. {
  4747. // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need
  4748. // to emit SPIRAccessChain rather than a plain SPIRExpression.
  4749. uint32_t chain_arguments = length - 3;
  4750. if (chain_arguments > type.array.size())
  4751. need_byte_access_chain = true;
  4752. }
  4753. if (need_byte_access_chain)
  4754. {
  4755. // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block,
  4756. // and not array of SSBO.
  4757. uint32_t to_plain_buffer_length = chain ? 0u : static_cast<uint32_t>(type.array.size());
  4758. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  4759. if (backing_variable != nullptr && is_user_type_structured(backing_variable->self))
  4760. {
  4761. CompilerGLSL::emit_instruction(instruction);
  4762. return;
  4763. }
  4764. string base;
  4765. if (to_plain_buffer_length != 0)
  4766. base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get<SPIRType>(ops[0]));
  4767. else if (chain)
  4768. base = chain->base;
  4769. else
  4770. base = to_expression(ops[2]);
  4771. // Start traversing type hierarchy at the proper non-pointer types.
  4772. auto *basetype = &get_pointee_type(type);
  4773. // Traverse the type hierarchy down to the actual buffer types.
  4774. for (uint32_t i = 0; i < to_plain_buffer_length; i++)
  4775. {
  4776. assert(basetype->parent_type);
  4777. basetype = &get<SPIRType>(basetype->parent_type);
  4778. }
  4779. uint32_t matrix_stride = 0;
  4780. uint32_t array_stride = 0;
  4781. bool row_major_matrix = false;
  4782. // Inherit matrix information.
  4783. if (chain)
  4784. {
  4785. matrix_stride = chain->matrix_stride;
  4786. row_major_matrix = chain->row_major_matrix;
  4787. array_stride = chain->array_stride;
  4788. }
  4789. auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
  4790. length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix,
  4791. &matrix_stride, &array_stride);
  4792. auto &e = set<SPIRAccessChain>(ops[1], ops[0], type.storage, base, offsets.first, offsets.second);
  4793. e.row_major_matrix = row_major_matrix;
  4794. e.matrix_stride = matrix_stride;
  4795. e.array_stride = array_stride;
  4796. e.immutable = should_forward(ops[2]);
  4797. e.loaded_from = backing_variable ? backing_variable->self : ID(0);
  4798. if (chain)
  4799. {
  4800. e.dynamic_index += chain->dynamic_index;
  4801. e.static_index += chain->static_index;
  4802. }
  4803. for (uint32_t i = 2; i < length; i++)
  4804. {
  4805. inherit_expression_dependencies(ops[1], ops[i]);
  4806. add_implied_read_expression(e, ops[i]);
  4807. }
  4808. }
  4809. else
  4810. {
  4811. CompilerGLSL::emit_instruction(instruction);
  4812. }
  4813. }
  4814. void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, Op op)
  4815. {
  4816. const char *atomic_op = nullptr;
  4817. string value_expr;
  4818. if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore)
  4819. value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
  4820. bool is_atomic_store = false;
  4821. switch (op)
  4822. {
  4823. case OpAtomicIIncrement:
  4824. atomic_op = "InterlockedAdd";
  4825. value_expr = "1";
  4826. break;
  4827. case OpAtomicIDecrement:
  4828. atomic_op = "InterlockedAdd";
  4829. value_expr = "-1";
  4830. break;
  4831. case OpAtomicLoad:
  4832. atomic_op = "InterlockedAdd";
  4833. value_expr = "0";
  4834. break;
  4835. case OpAtomicISub:
  4836. atomic_op = "InterlockedAdd";
  4837. value_expr = join("-", enclose_expression(value_expr));
  4838. break;
  4839. case OpAtomicSMin:
  4840. case OpAtomicUMin:
  4841. atomic_op = "InterlockedMin";
  4842. break;
  4843. case OpAtomicSMax:
  4844. case OpAtomicUMax:
  4845. atomic_op = "InterlockedMax";
  4846. break;
  4847. case OpAtomicAnd:
  4848. atomic_op = "InterlockedAnd";
  4849. break;
  4850. case OpAtomicOr:
  4851. atomic_op = "InterlockedOr";
  4852. break;
  4853. case OpAtomicXor:
  4854. atomic_op = "InterlockedXor";
  4855. break;
  4856. case OpAtomicIAdd:
  4857. atomic_op = "InterlockedAdd";
  4858. break;
  4859. case OpAtomicExchange:
  4860. atomic_op = "InterlockedExchange";
  4861. break;
  4862. case OpAtomicStore:
  4863. atomic_op = "InterlockedExchange";
  4864. is_atomic_store = true;
  4865. break;
  4866. case OpAtomicCompareExchange:
  4867. if (length < 8)
  4868. SPIRV_CROSS_THROW("Not enough data for opcode.");
  4869. atomic_op = "InterlockedCompareExchange";
  4870. value_expr = join(to_expression(ops[7]), ", ", value_expr);
  4871. break;
  4872. default:
  4873. SPIRV_CROSS_THROW("Unknown atomic opcode.");
  4874. }
  4875. if (is_atomic_store)
  4876. {
  4877. auto &data_type = expression_type(ops[0]);
  4878. auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
  4879. auto &tmp_id = extra_sub_expressions[ops[0]];
  4880. if (!tmp_id)
  4881. {
  4882. tmp_id = ir.increase_bound_by(1);
  4883. emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id);
  4884. }
  4885. if (data_type.storage == StorageClassImage || !chain)
  4886. {
  4887. statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ",
  4888. to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
  4889. }
  4890. else
  4891. {
  4892. string base = chain->base;
  4893. if (has_decoration(chain->self, DecorationNonUniform))
  4894. convert_non_uniform_expression(base, chain->self);
  4895. // RWByteAddress buffer is always uint in its underlying type.
  4896. statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ",
  4897. to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
  4898. }
  4899. }
  4900. else
  4901. {
  4902. uint32_t result_type = ops[0];
  4903. uint32_t id = ops[1];
  4904. forced_temporaries.insert(ops[1]);
  4905. auto &type = get<SPIRType>(result_type);
  4906. statement(variable_decl(type, to_name(id)), ";");
  4907. auto &data_type = expression_type(ops[2]);
  4908. auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
  4909. SPIRType::BaseType expr_type;
  4910. if (data_type.storage == StorageClassImage || !chain)
  4911. {
  4912. statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
  4913. expr_type = data_type.basetype;
  4914. }
  4915. else
  4916. {
  4917. // RWByteAddress buffer is always uint in its underlying type.
  4918. string base = chain->base;
  4919. if (has_decoration(chain->self, DecorationNonUniform))
  4920. convert_non_uniform_expression(base, chain->self);
  4921. expr_type = SPIRType::UInt;
  4922. statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
  4923. ", ", to_name(id), ");");
  4924. }
  4925. auto expr = bitcast_expression(type, expr_type, to_name(id));
  4926. set<SPIRExpression>(id, expr, result_type, true);
  4927. }
  4928. flush_all_atomic_capable_variables();
  4929. }
  4930. void CompilerHLSL::emit_subgroup_op(const Instruction &i)
  4931. {
  4932. if (hlsl_options.shader_model < 60)
  4933. SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher.");
  4934. const uint32_t *ops = stream(i);
  4935. auto op = static_cast<Op>(i.op);
  4936. uint32_t result_type = ops[0];
  4937. uint32_t id = ops[1];
  4938. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  4939. if (scope != ScopeSubgroup)
  4940. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  4941. const auto make_inclusive_Sum = [&](const string &expr) -> string {
  4942. return join(expr, " + ", to_expression(ops[4]));
  4943. };
  4944. const auto make_inclusive_Product = [&](const string &expr) -> string {
  4945. return join(expr, " * ", to_expression(ops[4]));
  4946. };
  4947. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  4948. uint32_t integer_width = get_integer_width_for_instruction(i);
  4949. auto int_type = to_signed_basetype(integer_width);
  4950. auto uint_type = to_unsigned_basetype(integer_width);
  4951. #define make_inclusive_BitAnd(expr) ""
  4952. #define make_inclusive_BitOr(expr) ""
  4953. #define make_inclusive_BitXor(expr) ""
  4954. #define make_inclusive_Min(expr) ""
  4955. #define make_inclusive_Max(expr) ""
  4956. switch (op)
  4957. {
  4958. case OpGroupNonUniformElect:
  4959. emit_op(result_type, id, "WaveIsFirstLane()", true);
  4960. break;
  4961. case OpGroupNonUniformBroadcast:
  4962. emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
  4963. break;
  4964. case OpGroupNonUniformBroadcastFirst:
  4965. emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst");
  4966. break;
  4967. case OpGroupNonUniformBallot:
  4968. emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot");
  4969. break;
  4970. case OpGroupNonUniformInverseBallot:
  4971. SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL.");
  4972. case OpGroupNonUniformBallotBitExtract:
  4973. SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL.");
  4974. case OpGroupNonUniformBallotFindLSB:
  4975. SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL.");
  4976. case OpGroupNonUniformBallotFindMSB:
  4977. SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL.");
  4978. case OpGroupNonUniformBallotBitCount:
  4979. {
  4980. auto operation = static_cast<GroupOperation>(ops[3]);
  4981. bool forward = should_forward(ops[4]);
  4982. if (operation == GroupOperationReduce)
  4983. {
  4984. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(",
  4985. to_enclosed_expression(ops[4]), ".y)");
  4986. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(",
  4987. to_enclosed_expression(ops[4]), ".w)");
  4988. emit_op(result_type, id, join(left, " + ", right), forward);
  4989. inherit_expression_dependencies(id, ops[4]);
  4990. }
  4991. else if (operation == GroupOperationInclusiveScan)
  4992. {
  4993. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(",
  4994. to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)");
  4995. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(",
  4996. to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)");
  4997. emit_op(result_type, id, join(left, " + ", right), forward);
  4998. if (!active_input_builtins.get(BuiltInSubgroupLeMask))
  4999. {
  5000. active_input_builtins.set(BuiltInSubgroupLeMask);
  5001. force_recompile_guarantee_forward_progress();
  5002. }
  5003. }
  5004. else if (operation == GroupOperationExclusiveScan)
  5005. {
  5006. auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(",
  5007. to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)");
  5008. auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(",
  5009. to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)");
  5010. emit_op(result_type, id, join(left, " + ", right), forward);
  5011. if (!active_input_builtins.get(BuiltInSubgroupLtMask))
  5012. {
  5013. active_input_builtins.set(BuiltInSubgroupLtMask);
  5014. force_recompile_guarantee_forward_progress();
  5015. }
  5016. }
  5017. else
  5018. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  5019. break;
  5020. }
  5021. case OpGroupNonUniformShuffle:
  5022. emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
  5023. break;
  5024. case OpGroupNonUniformShuffleXor:
  5025. {
  5026. bool forward = should_forward(ops[3]);
  5027. emit_op(ops[0], ops[1],
  5028. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  5029. "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward);
  5030. inherit_expression_dependencies(ops[1], ops[3]);
  5031. break;
  5032. }
  5033. case OpGroupNonUniformShuffleUp:
  5034. {
  5035. bool forward = should_forward(ops[3]);
  5036. emit_op(ops[0], ops[1],
  5037. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  5038. "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward);
  5039. inherit_expression_dependencies(ops[1], ops[3]);
  5040. break;
  5041. }
  5042. case OpGroupNonUniformShuffleDown:
  5043. {
  5044. bool forward = should_forward(ops[3]);
  5045. emit_op(ops[0], ops[1],
  5046. join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
  5047. "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward);
  5048. inherit_expression_dependencies(ops[1], ops[3]);
  5049. break;
  5050. }
  5051. case OpGroupNonUniformAll:
  5052. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue");
  5053. break;
  5054. case OpGroupNonUniformAny:
  5055. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue");
  5056. break;
  5057. case OpGroupNonUniformAllEqual:
  5058. emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual");
  5059. break;
  5060. // clang-format off
  5061. #define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \
  5062. case OpGroupNonUniform##op: \
  5063. { \
  5064. auto operation = static_cast<GroupOperation>(ops[3]); \
  5065. if (operation == GroupOperationReduce) \
  5066. emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \
  5067. else if (operation == GroupOperationInclusiveScan && supports_scan) \
  5068. { \
  5069. bool forward = should_forward(ops[4]); \
  5070. emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \
  5071. inherit_expression_dependencies(id, ops[4]); \
  5072. } \
  5073. else if (operation == GroupOperationExclusiveScan && supports_scan) \
  5074. emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \
  5075. else if (operation == GroupOperationClusteredReduce) \
  5076. SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \
  5077. else \
  5078. SPIRV_CROSS_THROW("Invalid group operation."); \
  5079. break; \
  5080. }
  5081. #define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \
  5082. case OpGroupNonUniform##op: \
  5083. { \
  5084. auto operation = static_cast<GroupOperation>(ops[3]); \
  5085. if (operation == GroupOperationReduce) \
  5086. emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \
  5087. else \
  5088. SPIRV_CROSS_THROW("Invalid group operation."); \
  5089. break; \
  5090. }
  5091. HLSL_GROUP_OP(FAdd, Sum, true)
  5092. HLSL_GROUP_OP(FMul, Product, true)
  5093. HLSL_GROUP_OP(FMin, Min, false)
  5094. HLSL_GROUP_OP(FMax, Max, false)
  5095. HLSL_GROUP_OP(IAdd, Sum, true)
  5096. HLSL_GROUP_OP(IMul, Product, true)
  5097. HLSL_GROUP_OP_CAST(SMin, Min, int_type)
  5098. HLSL_GROUP_OP_CAST(SMax, Max, int_type)
  5099. HLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  5100. HLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  5101. HLSL_GROUP_OP(BitwiseAnd, BitAnd, false)
  5102. HLSL_GROUP_OP(BitwiseOr, BitOr, false)
  5103. HLSL_GROUP_OP(BitwiseXor, BitXor, false)
  5104. HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type)
  5105. HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type)
  5106. HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type)
  5107. #undef HLSL_GROUP_OP
  5108. #undef HLSL_GROUP_OP_CAST
  5109. // clang-format on
  5110. case OpGroupNonUniformQuadSwap:
  5111. {
  5112. uint32_t direction = evaluate_constant_u32(ops[4]);
  5113. if (direction == 0)
  5114. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX");
  5115. else if (direction == 1)
  5116. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY");
  5117. else if (direction == 2)
  5118. emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal");
  5119. else
  5120. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  5121. break;
  5122. }
  5123. case OpGroupNonUniformQuadBroadcast:
  5124. {
  5125. emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt");
  5126. break;
  5127. }
  5128. default:
  5129. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  5130. }
  5131. register_control_dependent_expression(id);
  5132. }
  5133. void CompilerHLSL::emit_instruction(const Instruction &instruction)
  5134. {
  5135. auto ops = stream(instruction);
  5136. auto opcode = static_cast<Op>(instruction.op);
  5137. #define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  5138. #define HLSL_BOP_CAST(op, type) \
  5139. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false)
  5140. #define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  5141. #define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  5142. #define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  5143. #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  5144. #define HLSL_BFOP_CAST(op, type) \
  5145. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  5146. #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  5147. #define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  5148. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  5149. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  5150. auto int_type = to_signed_basetype(integer_width);
  5151. auto uint_type = to_unsigned_basetype(integer_width);
  5152. opcode = get_remapped_spirv_op(opcode);
  5153. switch (opcode)
  5154. {
  5155. case OpAccessChain:
  5156. case OpInBoundsAccessChain:
  5157. {
  5158. emit_access_chain(instruction);
  5159. break;
  5160. }
  5161. case OpBitcast:
  5162. {
  5163. auto bitcast_type = get_bitcast_type(ops[0], ops[2]);
  5164. if (bitcast_type == CompilerHLSL::TypeNormal)
  5165. CompilerGLSL::emit_instruction(instruction);
  5166. else
  5167. {
  5168. if (!requires_uint2_packing)
  5169. {
  5170. requires_uint2_packing = true;
  5171. force_recompile();
  5172. }
  5173. if (bitcast_type == CompilerHLSL::TypePackUint2x32)
  5174. emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32");
  5175. else
  5176. emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32");
  5177. }
  5178. break;
  5179. }
  5180. case OpSelect:
  5181. {
  5182. auto &value_type = expression_type(ops[3]);
  5183. if (value_type.basetype == SPIRType::Struct || is_array(value_type))
  5184. {
  5185. // HLSL does not support ternary expressions on composites.
  5186. // Cannot use branches, since we might be in a continue block
  5187. // where explicit control flow is prohibited.
  5188. // Emit a helper function where we can use control flow.
  5189. TypeID value_type_id = expression_type_id(ops[3]);
  5190. auto itr = std::find(composite_selection_workaround_types.begin(),
  5191. composite_selection_workaround_types.end(),
  5192. value_type_id);
  5193. if (itr == composite_selection_workaround_types.end())
  5194. {
  5195. composite_selection_workaround_types.push_back(value_type_id);
  5196. force_recompile();
  5197. }
  5198. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  5199. statement("spvSelectComposite(",
  5200. to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  5201. to_expression(ops[3]), ", ", to_expression(ops[4]), ");");
  5202. }
  5203. else
  5204. CompilerGLSL::emit_instruction(instruction);
  5205. break;
  5206. }
  5207. case OpStore:
  5208. {
  5209. emit_store(instruction);
  5210. break;
  5211. }
  5212. case OpLoad:
  5213. {
  5214. emit_load(instruction);
  5215. break;
  5216. }
  5217. case OpMatrixTimesVector:
  5218. {
  5219. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  5220. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  5221. break;
  5222. }
  5223. case OpVectorTimesMatrix:
  5224. {
  5225. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  5226. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  5227. break;
  5228. }
  5229. case OpMatrixTimesMatrix:
  5230. {
  5231. // Matrices are kept in a transposed state all the time, flip multiplication order always.
  5232. emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
  5233. break;
  5234. }
  5235. case OpOuterProduct:
  5236. {
  5237. uint32_t result_type = ops[0];
  5238. uint32_t id = ops[1];
  5239. uint32_t a = ops[2];
  5240. uint32_t b = ops[3];
  5241. auto &type = get<SPIRType>(result_type);
  5242. string expr = type_to_glsl_constructor(type);
  5243. expr += "(";
  5244. for (uint32_t col = 0; col < type.columns; col++)
  5245. {
  5246. expr += to_enclosed_expression(a);
  5247. expr += " * ";
  5248. expr += to_extract_component_expression(b, col);
  5249. if (col + 1 < type.columns)
  5250. expr += ", ";
  5251. }
  5252. expr += ")";
  5253. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  5254. inherit_expression_dependencies(id, a);
  5255. inherit_expression_dependencies(id, b);
  5256. break;
  5257. }
  5258. case OpFMod:
  5259. {
  5260. if (!requires_op_fmod)
  5261. {
  5262. requires_op_fmod = true;
  5263. force_recompile();
  5264. }
  5265. CompilerGLSL::emit_instruction(instruction);
  5266. break;
  5267. }
  5268. case OpFRem:
  5269. emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod");
  5270. break;
  5271. case OpImage:
  5272. {
  5273. uint32_t result_type = ops[0];
  5274. uint32_t id = ops[1];
  5275. auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]);
  5276. if (combined)
  5277. {
  5278. auto &e = emit_op(result_type, id, to_expression(combined->image), true, true);
  5279. auto *var = maybe_get_backing_variable(combined->image);
  5280. if (var)
  5281. e.loaded_from = var->self;
  5282. }
  5283. else
  5284. {
  5285. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  5286. auto *var = maybe_get_backing_variable(ops[2]);
  5287. if (var)
  5288. e.loaded_from = var->self;
  5289. }
  5290. break;
  5291. }
  5292. case OpDPdx:
  5293. HLSL_UFOP(ddx);
  5294. register_control_dependent_expression(ops[1]);
  5295. break;
  5296. case OpDPdy:
  5297. HLSL_UFOP(ddy);
  5298. register_control_dependent_expression(ops[1]);
  5299. break;
  5300. case OpDPdxFine:
  5301. HLSL_UFOP(ddx_fine);
  5302. register_control_dependent_expression(ops[1]);
  5303. break;
  5304. case OpDPdyFine:
  5305. HLSL_UFOP(ddy_fine);
  5306. register_control_dependent_expression(ops[1]);
  5307. break;
  5308. case OpDPdxCoarse:
  5309. HLSL_UFOP(ddx_coarse);
  5310. register_control_dependent_expression(ops[1]);
  5311. break;
  5312. case OpDPdyCoarse:
  5313. HLSL_UFOP(ddy_coarse);
  5314. register_control_dependent_expression(ops[1]);
  5315. break;
  5316. case OpFwidth:
  5317. case OpFwidthCoarse:
  5318. case OpFwidthFine:
  5319. HLSL_UFOP(fwidth);
  5320. register_control_dependent_expression(ops[1]);
  5321. break;
  5322. case OpLogicalNot:
  5323. {
  5324. auto result_type = ops[0];
  5325. auto id = ops[1];
  5326. auto &type = get<SPIRType>(result_type);
  5327. if (type.vecsize > 1)
  5328. emit_unrolled_unary_op(result_type, id, ops[2], "!");
  5329. else
  5330. HLSL_UOP(!);
  5331. break;
  5332. }
  5333. case OpIEqual:
  5334. {
  5335. auto result_type = ops[0];
  5336. auto id = ops[1];
  5337. if (expression_type(ops[2]).vecsize > 1)
  5338. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
  5339. else
  5340. HLSL_BOP_CAST(==, int_type);
  5341. break;
  5342. }
  5343. case OpLogicalEqual:
  5344. case OpFOrdEqual:
  5345. case OpFUnordEqual:
  5346. {
  5347. // HLSL != operator is unordered.
  5348. // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
  5349. // isnan() is apparently implemented as x != x as well.
  5350. // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
  5351. // HACK: FUnordEqual will be implemented as FOrdEqual.
  5352. auto result_type = ops[0];
  5353. auto id = ops[1];
  5354. if (expression_type(ops[2]).vecsize > 1)
  5355. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
  5356. else
  5357. HLSL_BOP(==);
  5358. break;
  5359. }
  5360. case OpINotEqual:
  5361. {
  5362. auto result_type = ops[0];
  5363. auto id = ops[1];
  5364. if (expression_type(ops[2]).vecsize > 1)
  5365. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
  5366. else
  5367. HLSL_BOP_CAST(!=, int_type);
  5368. break;
  5369. }
  5370. case OpLogicalNotEqual:
  5371. case OpFOrdNotEqual:
  5372. case OpFUnordNotEqual:
  5373. {
  5374. // HLSL != operator is unordered.
  5375. // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
  5376. // isnan() is apparently implemented as x != x as well.
  5377. // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
  5378. // We would need to do something like not(UnordEqual), but that cannot be expressed either.
  5379. // Adding a lot of NaN checks would be a breaking change from perspective of performance.
  5380. // SPIR-V will generally use isnan() checks when this even matters.
  5381. // HACK: FOrdNotEqual will be implemented as FUnordEqual.
  5382. auto result_type = ops[0];
  5383. auto id = ops[1];
  5384. if (expression_type(ops[2]).vecsize > 1)
  5385. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
  5386. else
  5387. HLSL_BOP(!=);
  5388. break;
  5389. }
  5390. case OpUGreaterThan:
  5391. case OpSGreaterThan:
  5392. {
  5393. auto result_type = ops[0];
  5394. auto id = ops[1];
  5395. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  5396. if (expression_type(ops[2]).vecsize > 1)
  5397. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
  5398. else
  5399. HLSL_BOP_CAST(>, type);
  5400. break;
  5401. }
  5402. case OpFOrdGreaterThan:
  5403. {
  5404. auto result_type = ops[0];
  5405. auto id = ops[1];
  5406. if (expression_type(ops[2]).vecsize > 1)
  5407. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
  5408. else
  5409. HLSL_BOP(>);
  5410. break;
  5411. }
  5412. case OpFUnordGreaterThan:
  5413. {
  5414. auto result_type = ops[0];
  5415. auto id = ops[1];
  5416. if (expression_type(ops[2]).vecsize > 1)
  5417. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
  5418. else
  5419. CompilerGLSL::emit_instruction(instruction);
  5420. break;
  5421. }
  5422. case OpUGreaterThanEqual:
  5423. case OpSGreaterThanEqual:
  5424. {
  5425. auto result_type = ops[0];
  5426. auto id = ops[1];
  5427. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  5428. if (expression_type(ops[2]).vecsize > 1)
  5429. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
  5430. else
  5431. HLSL_BOP_CAST(>=, type);
  5432. break;
  5433. }
  5434. case OpFOrdGreaterThanEqual:
  5435. {
  5436. auto result_type = ops[0];
  5437. auto id = ops[1];
  5438. if (expression_type(ops[2]).vecsize > 1)
  5439. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
  5440. else
  5441. HLSL_BOP(>=);
  5442. break;
  5443. }
  5444. case OpFUnordGreaterThanEqual:
  5445. {
  5446. auto result_type = ops[0];
  5447. auto id = ops[1];
  5448. if (expression_type(ops[2]).vecsize > 1)
  5449. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
  5450. else
  5451. CompilerGLSL::emit_instruction(instruction);
  5452. break;
  5453. }
  5454. case OpULessThan:
  5455. case OpSLessThan:
  5456. {
  5457. auto result_type = ops[0];
  5458. auto id = ops[1];
  5459. auto type = opcode == OpULessThan ? uint_type : int_type;
  5460. if (expression_type(ops[2]).vecsize > 1)
  5461. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
  5462. else
  5463. HLSL_BOP_CAST(<, type);
  5464. break;
  5465. }
  5466. case OpFOrdLessThan:
  5467. {
  5468. auto result_type = ops[0];
  5469. auto id = ops[1];
  5470. if (expression_type(ops[2]).vecsize > 1)
  5471. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
  5472. else
  5473. HLSL_BOP(<);
  5474. break;
  5475. }
  5476. case OpFUnordLessThan:
  5477. {
  5478. auto result_type = ops[0];
  5479. auto id = ops[1];
  5480. if (expression_type(ops[2]).vecsize > 1)
  5481. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
  5482. else
  5483. CompilerGLSL::emit_instruction(instruction);
  5484. break;
  5485. }
  5486. case OpULessThanEqual:
  5487. case OpSLessThanEqual:
  5488. {
  5489. auto result_type = ops[0];
  5490. auto id = ops[1];
  5491. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  5492. if (expression_type(ops[2]).vecsize > 1)
  5493. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
  5494. else
  5495. HLSL_BOP_CAST(<=, type);
  5496. break;
  5497. }
  5498. case OpFOrdLessThanEqual:
  5499. {
  5500. auto result_type = ops[0];
  5501. auto id = ops[1];
  5502. if (expression_type(ops[2]).vecsize > 1)
  5503. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
  5504. else
  5505. HLSL_BOP(<=);
  5506. break;
  5507. }
  5508. case OpFUnordLessThanEqual:
  5509. {
  5510. auto result_type = ops[0];
  5511. auto id = ops[1];
  5512. if (expression_type(ops[2]).vecsize > 1)
  5513. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
  5514. else
  5515. CompilerGLSL::emit_instruction(instruction);
  5516. break;
  5517. }
  5518. case OpImageQueryLod:
  5519. emit_texture_op(instruction, false);
  5520. break;
  5521. case OpImageQuerySizeLod:
  5522. {
  5523. auto result_type = ops[0];
  5524. auto id = ops[1];
  5525. require_texture_query_variant(ops[2]);
  5526. auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
  5527. statement("uint ", dummy_samples_levels, ";");
  5528. auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ",
  5529. bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")");
  5530. auto &restype = get<SPIRType>(ops[0]);
  5531. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5532. emit_op(result_type, id, expr, true);
  5533. break;
  5534. }
  5535. case OpImageQuerySize:
  5536. {
  5537. auto result_type = ops[0];
  5538. auto id = ops[1];
  5539. require_texture_query_variant(ops[2]);
  5540. bool uav = expression_type(ops[2]).image.sampled == 2;
  5541. if (const auto *var = maybe_get_backing_variable(ops[2]))
  5542. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
  5543. uav = false;
  5544. auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
  5545. statement("uint ", dummy_samples_levels, ";");
  5546. string expr;
  5547. if (uav)
  5548. expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")");
  5549. else
  5550. expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
  5551. auto &restype = get<SPIRType>(ops[0]);
  5552. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5553. emit_op(result_type, id, expr, true);
  5554. break;
  5555. }
  5556. case OpImageQuerySamples:
  5557. case OpImageQueryLevels:
  5558. {
  5559. auto result_type = ops[0];
  5560. auto id = ops[1];
  5561. require_texture_query_variant(ops[2]);
  5562. bool uav = expression_type(ops[2]).image.sampled == 2;
  5563. if (opcode == OpImageQueryLevels && uav)
  5564. SPIRV_CROSS_THROW("Cannot query levels for UAV images.");
  5565. if (const auto *var = maybe_get_backing_variable(ops[2]))
  5566. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
  5567. uav = false;
  5568. // Keep it simple and do not emit special variants to make this look nicer ...
  5569. // This stuff is barely, if ever, used.
  5570. forced_temporaries.insert(id);
  5571. auto &type = get<SPIRType>(result_type);
  5572. statement(variable_decl(type, to_name(id)), ";");
  5573. if (uav)
  5574. statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");");
  5575. else
  5576. statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");");
  5577. auto &restype = get<SPIRType>(ops[0]);
  5578. auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id));
  5579. set<SPIRExpression>(id, expr, result_type, true);
  5580. break;
  5581. }
  5582. case OpImageRead:
  5583. {
  5584. uint32_t result_type = ops[0];
  5585. uint32_t id = ops[1];
  5586. auto *var = maybe_get_backing_variable(ops[2]);
  5587. auto &type = expression_type(ops[2]);
  5588. bool subpass_data = type.image.dim == DimSubpassData;
  5589. bool pure = false;
  5590. string imgexpr;
  5591. if (subpass_data)
  5592. {
  5593. if (hlsl_options.shader_model < 40)
  5594. SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3.");
  5595. // Similar to GLSL, implement subpass loads using texelFetch.
  5596. if (type.image.ms)
  5597. {
  5598. uint32_t operands = ops[4];
  5599. if (operands != ImageOperandsSampleMask || instruction.length != 6)
  5600. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
  5601. uint32_t sample = ops[5];
  5602. imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
  5603. }
  5604. else
  5605. imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
  5606. pure = true;
  5607. }
  5608. else
  5609. {
  5610. imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]");
  5611. // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
  5612. // except that the underlying type changes how the data is interpreted.
  5613. bool force_srv =
  5614. hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable);
  5615. pure = force_srv;
  5616. if (var && !subpass_data && !force_srv)
  5617. imgexpr = remap_swizzle(get<SPIRType>(result_type),
  5618. image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
  5619. }
  5620. if (var)
  5621. {
  5622. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  5623. auto &e = emit_op(result_type, id, imgexpr, forward);
  5624. if (!pure)
  5625. {
  5626. e.loaded_from = var->self;
  5627. if (forward)
  5628. var->dependees.push_back(id);
  5629. }
  5630. }
  5631. else
  5632. emit_op(result_type, id, imgexpr, false);
  5633. inherit_expression_dependencies(id, ops[2]);
  5634. if (type.image.ms)
  5635. inherit_expression_dependencies(id, ops[5]);
  5636. break;
  5637. }
  5638. case OpImageWrite:
  5639. {
  5640. auto *var = maybe_get_backing_variable(ops[0]);
  5641. // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
  5642. // except that the underlying type changes how the data is interpreted.
  5643. auto value_expr = to_expression(ops[2]);
  5644. if (var)
  5645. {
  5646. auto &type = get<SPIRType>(var->basetype);
  5647. auto narrowed_type = get<SPIRType>(type.image.type);
  5648. narrowed_type.vecsize = image_format_to_components(type.image.format);
  5649. value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr);
  5650. }
  5651. statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
  5652. if (var && variable_storage_is_aliased(*var))
  5653. flush_all_aliased_variables();
  5654. break;
  5655. }
  5656. case OpImageTexelPointer:
  5657. {
  5658. uint32_t result_type = ops[0];
  5659. uint32_t id = ops[1];
  5660. auto expr = to_expression(ops[2]);
  5661. expr += join("[", to_expression(ops[3]), "]");
  5662. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  5663. // When using the pointer, we need to know which variable it is actually loaded from.
  5664. auto *var = maybe_get_backing_variable(ops[2]);
  5665. e.loaded_from = var ? var->self : ID(0);
  5666. inherit_expression_dependencies(id, ops[3]);
  5667. break;
  5668. }
  5669. case OpAtomicFAddEXT:
  5670. case OpAtomicFMinEXT:
  5671. case OpAtomicFMaxEXT:
  5672. SPIRV_CROSS_THROW("Floating-point atomics are not supported in HLSL.");
  5673. case OpAtomicCompareExchange:
  5674. case OpAtomicExchange:
  5675. case OpAtomicISub:
  5676. case OpAtomicSMin:
  5677. case OpAtomicUMin:
  5678. case OpAtomicSMax:
  5679. case OpAtomicUMax:
  5680. case OpAtomicAnd:
  5681. case OpAtomicOr:
  5682. case OpAtomicXor:
  5683. case OpAtomicIAdd:
  5684. case OpAtomicIIncrement:
  5685. case OpAtomicIDecrement:
  5686. case OpAtomicLoad:
  5687. case OpAtomicStore:
  5688. {
  5689. emit_atomic(ops, instruction.length, opcode);
  5690. break;
  5691. }
  5692. case OpControlBarrier:
  5693. case OpMemoryBarrier:
  5694. {
  5695. uint32_t memory;
  5696. uint32_t semantics;
  5697. if (opcode == OpMemoryBarrier)
  5698. {
  5699. memory = evaluate_constant_u32(ops[0]);
  5700. semantics = evaluate_constant_u32(ops[1]);
  5701. }
  5702. else
  5703. {
  5704. memory = evaluate_constant_u32(ops[1]);
  5705. semantics = evaluate_constant_u32(ops[2]);
  5706. }
  5707. if (memory == ScopeSubgroup)
  5708. {
  5709. // No Wave-barriers in HLSL.
  5710. break;
  5711. }
  5712. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  5713. semantics = mask_relevant_memory_semantics(semantics);
  5714. if (opcode == OpMemoryBarrier)
  5715. {
  5716. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  5717. // does what we need, so we avoid redundant barriers.
  5718. const Instruction *next = get_next_instruction_in_block(instruction);
  5719. if (next && next->op == OpControlBarrier)
  5720. {
  5721. auto *next_ops = stream(*next);
  5722. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  5723. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  5724. next_semantics = mask_relevant_memory_semantics(next_semantics);
  5725. // There is no "just execution barrier" in HLSL.
  5726. // If there are no memory semantics for next instruction, we will imply group shared memory is synced.
  5727. if (next_semantics == 0)
  5728. next_semantics = MemorySemanticsWorkgroupMemoryMask;
  5729. bool memory_scope_covered = false;
  5730. if (next_memory == memory)
  5731. memory_scope_covered = true;
  5732. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  5733. {
  5734. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  5735. // scope does not have to match.
  5736. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  5737. (memory == ScopeDevice || memory == ScopeWorkgroup))
  5738. {
  5739. memory_scope_covered = true;
  5740. }
  5741. }
  5742. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  5743. {
  5744. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  5745. memory_scope_covered = true;
  5746. }
  5747. // If we have the same memory scope, and all memory types are covered, we're good.
  5748. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  5749. break;
  5750. }
  5751. }
  5752. // We are synchronizing some memory or syncing execution,
  5753. // so we cannot forward any loads beyond the memory barrier.
  5754. if (semantics || opcode == OpControlBarrier)
  5755. {
  5756. assert(current_emitting_block);
  5757. flush_control_dependent_expressions(current_emitting_block->self);
  5758. flush_all_active_variables();
  5759. }
  5760. if (opcode == OpControlBarrier)
  5761. {
  5762. // We cannot emit just execution barrier, for no memory semantics pick the cheapest option.
  5763. if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0)
  5764. statement("GroupMemoryBarrierWithGroupSync();");
  5765. else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
  5766. statement("DeviceMemoryBarrierWithGroupSync();");
  5767. else
  5768. statement("AllMemoryBarrierWithGroupSync();");
  5769. }
  5770. else
  5771. {
  5772. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  5773. statement("GroupMemoryBarrier();");
  5774. else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
  5775. statement("DeviceMemoryBarrier();");
  5776. else
  5777. statement("AllMemoryBarrier();");
  5778. }
  5779. break;
  5780. }
  5781. case OpBitFieldInsert:
  5782. {
  5783. if (!requires_bitfield_insert)
  5784. {
  5785. requires_bitfield_insert = true;
  5786. force_recompile();
  5787. }
  5788. auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
  5789. to_expression(ops[4]), ", ", to_expression(ops[5]), ")");
  5790. bool forward =
  5791. should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]);
  5792. auto &restype = get<SPIRType>(ops[0]);
  5793. expr = bitcast_expression(restype, SPIRType::UInt, expr);
  5794. emit_op(ops[0], ops[1], expr, forward);
  5795. break;
  5796. }
  5797. case OpBitFieldSExtract:
  5798. case OpBitFieldUExtract:
  5799. {
  5800. if (!requires_bitfield_extract)
  5801. {
  5802. requires_bitfield_extract = true;
  5803. force_recompile();
  5804. }
  5805. if (opcode == OpBitFieldSExtract)
  5806. HLSL_TFOP(spvBitfieldSExtract);
  5807. else
  5808. HLSL_TFOP(spvBitfieldUExtract);
  5809. break;
  5810. }
  5811. case OpBitCount:
  5812. {
  5813. auto basetype = expression_type(ops[2]).basetype;
  5814. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype);
  5815. break;
  5816. }
  5817. case OpBitReverse:
  5818. HLSL_UFOP(reversebits);
  5819. break;
  5820. case OpArrayLength:
  5821. {
  5822. auto *var = maybe_get_backing_variable(ops[2]);
  5823. if (!var)
  5824. SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
  5825. auto &type = get<SPIRType>(var->basetype);
  5826. if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock))
  5827. SPIRV_CROSS_THROW("Array length expression must point to a block type.");
  5828. // This must be 32-bit uint, so we're good to go.
  5829. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  5830. statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
  5831. uint32_t offset = type_struct_member_offset(type, ops[3]);
  5832. uint32_t stride = type_struct_member_array_stride(type, ops[3]);
  5833. statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";");
  5834. break;
  5835. }
  5836. case OpIsHelperInvocationEXT:
  5837. if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
  5838. SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
  5839. // Helper lane state with demote is volatile by nature.
  5840. // Do not forward this.
  5841. emit_op(ops[0], ops[1], "IsHelperLane()", false);
  5842. break;
  5843. case OpBeginInvocationInterlockEXT:
  5844. case OpEndInvocationInterlockEXT:
  5845. if (hlsl_options.shader_model < 51)
  5846. SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
  5847. break; // Nothing to do in the body
  5848. case OpRayQueryInitializeKHR:
  5849. {
  5850. flush_variable_declaration(ops[0]);
  5851. std::string ray_desc_name = get_unique_identifier();
  5852. statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  5853. to_expression(ops[6]), ", ", to_expression(ops[7]), "};");
  5854. statement(to_expression(ops[0]), ".TraceRayInline(",
  5855. to_expression(ops[1]), ", ", // acc structure
  5856. to_expression(ops[2]), ", ", // ray flags
  5857. to_expression(ops[3]), ", ", // mask
  5858. ray_desc_name, ");"); // ray
  5859. break;
  5860. }
  5861. case OpRayQueryProceedKHR:
  5862. {
  5863. flush_variable_declaration(ops[0]);
  5864. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false);
  5865. break;
  5866. }
  5867. case OpRayQueryTerminateKHR:
  5868. {
  5869. flush_variable_declaration(ops[0]);
  5870. statement(to_expression(ops[0]), ".Abort();");
  5871. break;
  5872. }
  5873. case OpRayQueryGenerateIntersectionKHR:
  5874. {
  5875. flush_variable_declaration(ops[0]);
  5876. statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", to_expression(ops[1]), ");");
  5877. break;
  5878. }
  5879. case OpRayQueryConfirmIntersectionKHR:
  5880. {
  5881. flush_variable_declaration(ops[0]);
  5882. statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();");
  5883. break;
  5884. }
  5885. case OpRayQueryGetIntersectionTypeKHR:
  5886. {
  5887. emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops);
  5888. break;
  5889. }
  5890. case OpRayQueryGetIntersectionTKHR:
  5891. {
  5892. emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops);
  5893. break;
  5894. }
  5895. case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
  5896. {
  5897. emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops);
  5898. break;
  5899. }
  5900. case OpRayQueryGetIntersectionInstanceIdKHR:
  5901. {
  5902. emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops);
  5903. break;
  5904. }
  5905. case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
  5906. {
  5907. emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()",
  5908. ".CandidateInstanceContributionToHitGroupIndex()", ops);
  5909. break;
  5910. }
  5911. case OpRayQueryGetIntersectionGeometryIndexKHR:
  5912. {
  5913. emit_rayquery_function(".CommittedGeometryIndex()",
  5914. ".CandidateGeometryIndex()", ops);
  5915. break;
  5916. }
  5917. case OpRayQueryGetIntersectionPrimitiveIndexKHR:
  5918. {
  5919. emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops);
  5920. break;
  5921. }
  5922. case OpRayQueryGetIntersectionBarycentricsKHR:
  5923. {
  5924. emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops);
  5925. break;
  5926. }
  5927. case OpRayQueryGetIntersectionFrontFaceKHR:
  5928. {
  5929. emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops);
  5930. break;
  5931. }
  5932. case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
  5933. {
  5934. flush_variable_declaration(ops[0]);
  5935. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false);
  5936. break;
  5937. }
  5938. case OpRayQueryGetIntersectionObjectRayDirectionKHR:
  5939. {
  5940. emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops);
  5941. break;
  5942. }
  5943. case OpRayQueryGetIntersectionObjectRayOriginKHR:
  5944. {
  5945. flush_variable_declaration(ops[0]);
  5946. emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops);
  5947. break;
  5948. }
  5949. case OpRayQueryGetIntersectionObjectToWorldKHR:
  5950. {
  5951. emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops);
  5952. break;
  5953. }
  5954. case OpRayQueryGetIntersectionWorldToObjectKHR:
  5955. {
  5956. emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops);
  5957. break;
  5958. }
  5959. case OpRayQueryGetRayFlagsKHR:
  5960. {
  5961. flush_variable_declaration(ops[0]);
  5962. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false);
  5963. break;
  5964. }
  5965. case OpRayQueryGetRayTMinKHR:
  5966. {
  5967. flush_variable_declaration(ops[0]);
  5968. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false);
  5969. break;
  5970. }
  5971. case OpRayQueryGetWorldRayOriginKHR:
  5972. {
  5973. flush_variable_declaration(ops[0]);
  5974. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false);
  5975. break;
  5976. }
  5977. case OpRayQueryGetWorldRayDirectionKHR:
  5978. {
  5979. flush_variable_declaration(ops[0]);
  5980. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false);
  5981. break;
  5982. }
  5983. case OpSetMeshOutputsEXT:
  5984. {
  5985. statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
  5986. break;
  5987. }
  5988. case OpEmitVertex:
  5989. {
  5990. emit_geometry_stream_append();
  5991. break;
  5992. }
  5993. case OpEndPrimitive:
  5994. {
  5995. statement("geometry_stream.RestartStrip();");
  5996. break;
  5997. }
  5998. default:
  5999. CompilerGLSL::emit_instruction(instruction);
  6000. break;
  6001. }
  6002. }
  6003. void CompilerHLSL::require_texture_query_variant(uint32_t var_id)
  6004. {
  6005. if (const auto *var = maybe_get_backing_variable(var_id))
  6006. var_id = var->self;
  6007. auto &type = expression_type(var_id);
  6008. bool uav = type.image.sampled == 2;
  6009. if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable))
  6010. uav = false;
  6011. uint32_t bit = 0;
  6012. switch (type.image.dim)
  6013. {
  6014. case Dim1D:
  6015. bit = type.image.arrayed ? Query1DArray : Query1D;
  6016. break;
  6017. case Dim2D:
  6018. if (type.image.ms)
  6019. bit = type.image.arrayed ? Query2DMSArray : Query2DMS;
  6020. else
  6021. bit = type.image.arrayed ? Query2DArray : Query2D;
  6022. break;
  6023. case Dim3D:
  6024. bit = Query3D;
  6025. break;
  6026. case DimCube:
  6027. bit = type.image.arrayed ? QueryCubeArray : QueryCube;
  6028. break;
  6029. case DimBuffer:
  6030. bit = QueryBuffer;
  6031. break;
  6032. default:
  6033. SPIRV_CROSS_THROW("Unsupported query type.");
  6034. }
  6035. switch (get<SPIRType>(type.image.type).basetype)
  6036. {
  6037. case SPIRType::Float:
  6038. bit += QueryTypeFloat;
  6039. break;
  6040. case SPIRType::Int:
  6041. bit += QueryTypeInt;
  6042. break;
  6043. case SPIRType::UInt:
  6044. bit += QueryTypeUInt;
  6045. break;
  6046. default:
  6047. SPIRV_CROSS_THROW("Unsupported query type.");
  6048. }
  6049. auto norm_state = image_format_to_normalized_state(type.image.format);
  6050. auto &variant = uav ? required_texture_size_variants
  6051. .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] :
  6052. required_texture_size_variants.srv;
  6053. uint64_t mask = 1ull << bit;
  6054. if ((variant & mask) == 0)
  6055. {
  6056. force_recompile();
  6057. variant |= mask;
  6058. }
  6059. }
  6060. void CompilerHLSL::set_root_constant_layouts(std::vector<RootConstants> layout)
  6061. {
  6062. root_constants_layout = std::move(layout);
  6063. }
  6064. void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes)
  6065. {
  6066. remap_vertex_attributes.push_back(vertex_attributes);
  6067. }
  6068. VariableID CompilerHLSL::remap_num_workgroups_builtin()
  6069. {
  6070. update_active_builtins();
  6071. if (!active_input_builtins.get(BuiltInNumWorkgroups))
  6072. return 0;
  6073. // Create a new, fake UBO.
  6074. uint32_t offset = ir.increase_bound_by(4);
  6075. uint32_t uint_type_id = offset;
  6076. uint32_t block_type_id = offset + 1;
  6077. uint32_t block_pointer_type_id = offset + 2;
  6078. uint32_t variable_id = offset + 3;
  6079. SPIRType uint_type { OpTypeVector };
  6080. uint_type.basetype = SPIRType::UInt;
  6081. uint_type.width = 32;
  6082. uint_type.vecsize = 3;
  6083. uint_type.columns = 1;
  6084. set<SPIRType>(uint_type_id, uint_type);
  6085. SPIRType block_type { OpTypeStruct };
  6086. block_type.basetype = SPIRType::Struct;
  6087. block_type.member_types.push_back(uint_type_id);
  6088. set<SPIRType>(block_type_id, block_type);
  6089. set_decoration(block_type_id, DecorationBlock);
  6090. set_member_name(block_type_id, 0, "count");
  6091. set_member_decoration(block_type_id, 0, DecorationOffset, 0);
  6092. SPIRType block_pointer_type = block_type;
  6093. block_pointer_type.pointer = true;
  6094. block_pointer_type.storage = StorageClassUniform;
  6095. block_pointer_type.parent_type = block_type_id;
  6096. auto &ptr_type = set<SPIRType>(block_pointer_type_id, block_pointer_type);
  6097. // Preserve self.
  6098. ptr_type.self = block_type_id;
  6099. set<SPIRVariable>(variable_id, block_pointer_type_id, StorageClassUniform);
  6100. ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups";
  6101. num_workgroups_builtin = variable_id;
  6102. get_entry_point().interface_variables.push_back(num_workgroups_builtin);
  6103. return variable_id;
  6104. }
  6105. void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags)
  6106. {
  6107. resource_binding_flags = flags;
  6108. }
  6109. void CompilerHLSL::validate_shader_model()
  6110. {
  6111. // Check for nonuniform qualifier.
  6112. // Instead of looping over all decorations to find this, just look at capabilities.
  6113. for (auto &cap : ir.declared_capabilities)
  6114. {
  6115. switch (cap)
  6116. {
  6117. case CapabilityShaderNonUniformEXT:
  6118. case CapabilityRuntimeDescriptorArrayEXT:
  6119. if (hlsl_options.shader_model < 51)
  6120. SPIRV_CROSS_THROW(
  6121. "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex.");
  6122. break;
  6123. case CapabilityVariablePointers:
  6124. case CapabilityVariablePointersStorageBuffer:
  6125. SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL.");
  6126. default:
  6127. break;
  6128. }
  6129. }
  6130. if (ir.addressing_model != AddressingModelLogical)
  6131. SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL.");
  6132. if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62)
  6133. SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support.");
  6134. }
  6135. string CompilerHLSL::compile()
  6136. {
  6137. ir.fixup_reserved_names();
  6138. // Do not deal with ES-isms like precision, older extensions and such.
  6139. options.es = false;
  6140. options.version = 450;
  6141. options.vulkan_semantics = true;
  6142. backend.float_literal_suffix = true;
  6143. backend.double_literal_suffix = false;
  6144. backend.long_long_literal_suffix = true;
  6145. backend.uint32_t_literal_suffix = true;
  6146. backend.int16_t_literal_suffix = "";
  6147. backend.uint16_t_literal_suffix = "u";
  6148. backend.basic_int_type = "int";
  6149. backend.basic_uint_type = "uint";
  6150. backend.demote_literal = "discard";
  6151. backend.boolean_mix_function = "";
  6152. backend.swizzle_is_function = false;
  6153. backend.shared_is_implied = true;
  6154. backend.unsized_array_supported = true;
  6155. backend.explicit_struct_type = false;
  6156. backend.use_initializer_list = true;
  6157. backend.use_constructor_splatting = false;
  6158. backend.can_swizzle_scalar = true;
  6159. backend.can_declare_struct_inline = false;
  6160. backend.can_declare_arrays_inline = false;
  6161. backend.can_return_array = false;
  6162. backend.nonuniform_qualifier = "NonUniformResourceIndex";
  6163. backend.support_case_fallthrough = false;
  6164. backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT;
  6165. backend.force_gl_in_out_block = backend.force_merged_mesh_block;
  6166. backend.supports_empty_struct = hlsl_options.shader_model <= 30;
  6167. // SM 4.1 does not support precise for some reason.
  6168. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
  6169. fixup_anonymous_struct_names();
  6170. fixup_type_alias();
  6171. reorder_type_alias();
  6172. build_function_control_flow_graphs_and_analyze();
  6173. validate_shader_model();
  6174. update_active_builtins();
  6175. analyze_image_and_sampler_usage();
  6176. analyze_interlocked_resource_usage();
  6177. if (get_execution_model() == ExecutionModelMeshEXT)
  6178. analyze_meshlet_writes();
  6179. if (get_execution_model() == ExecutionModelGeometry)
  6180. discover_geometry_emitters();
  6181. // Subpass input needs SV_Position.
  6182. if (need_subpass_input)
  6183. active_input_builtins.set(BuiltInFragCoord);
  6184. // Need to offset by BaseVertex/BaseInstance in SM 6.8+.
  6185. if (hlsl_options.shader_model >= 68)
  6186. {
  6187. if (active_input_builtins.get(BuiltInVertexIndex))
  6188. active_input_builtins.set(BuiltInBaseVertex);
  6189. if (active_input_builtins.get(BuiltInInstanceIndex))
  6190. active_input_builtins.set(BuiltInBaseInstance);
  6191. }
  6192. uint32_t pass_count = 0;
  6193. do
  6194. {
  6195. reset(pass_count);
  6196. // Move constructor for this type is broken on GCC 4.9 ...
  6197. buffer.reset();
  6198. emit_header();
  6199. emit_resources();
  6200. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  6201. emit_hlsl_entry_point();
  6202. pass_count++;
  6203. } while (is_forcing_recompilation());
  6204. // Entry point in HLSL is always main() for the time being.
  6205. get_entry_point().name = "main";
  6206. return buffer.str();
  6207. }
  6208. void CompilerHLSL::emit_block_hints(const SPIRBlock &block)
  6209. {
  6210. switch (block.hint)
  6211. {
  6212. case SPIRBlock::HintFlatten:
  6213. statement("[flatten]");
  6214. break;
  6215. case SPIRBlock::HintDontFlatten:
  6216. statement("[branch]");
  6217. break;
  6218. case SPIRBlock::HintUnroll:
  6219. statement("[unroll]");
  6220. break;
  6221. case SPIRBlock::HintDontUnroll:
  6222. statement("[loop]");
  6223. break;
  6224. default:
  6225. break;
  6226. }
  6227. }
  6228. string CompilerHLSL::get_unique_identifier()
  6229. {
  6230. return join("_", unique_identifier_count++, "ident");
  6231. }
  6232. void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding)
  6233. {
  6234. StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
  6235. resource_bindings[tuple] = { binding, false };
  6236. }
  6237. bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
  6238. {
  6239. StageSetBinding tuple = { model, desc_set, binding };
  6240. auto itr = resource_bindings.find(tuple);
  6241. return itr != end(resource_bindings) && itr->second.second;
  6242. }
  6243. CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0)
  6244. {
  6245. auto &rslt_type = get<SPIRType>(result_type);
  6246. auto &expr_type = expression_type(op0);
  6247. if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt &&
  6248. expr_type.vecsize == 2)
  6249. return BitcastType::TypePackUint2x32;
  6250. else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 &&
  6251. expr_type.basetype == SPIRType::BaseType::UInt64)
  6252. return BitcastType::TypeUnpackUint64;
  6253. return BitcastType::TypeNormal;
  6254. }
  6255. bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const
  6256. {
  6257. if (hlsl_options.force_storage_buffer_as_uav)
  6258. {
  6259. return true;
  6260. }
  6261. const uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
  6262. const uint32_t binding = get_decoration(id, DecorationBinding);
  6263. return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end());
  6264. }
  6265. bool CompilerHLSL::is_hidden_io_variable(const SPIRVariable &var) const
  6266. {
  6267. if (!is_hidden_variable(var))
  6268. return false;
  6269. // It is too risky to remove stage IO variables that are linkable since it affects link compatibility.
  6270. // For vertex inputs and fragment outputs, it's less of a concern and we want reflection data
  6271. // to match reality.
  6272. bool is_external_linkage =
  6273. (get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput) ||
  6274. (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput);
  6275. if (!is_external_linkage)
  6276. return false;
  6277. // Unused output I/O variables might still be required to implement framebuffer fetch.
  6278. if (var.storage == StorageClassOutput && !is_legacy() &&
  6279. location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
  6280. return false;
  6281. return true;
  6282. }
  6283. void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding)
  6284. {
  6285. SetBindingPair pair = { desc_set, binding };
  6286. force_uav_buffer_bindings.insert(pair);
  6287. }
  6288. bool CompilerHLSL::is_user_type_structured(uint32_t id) const
  6289. {
  6290. if (hlsl_options.preserve_structured_buffers)
  6291. {
  6292. // Compare left hand side of string only as these user types can contain more meta data such as their subtypes,
  6293. // e.g. "structuredbuffer:int"
  6294. const std::string &user_type = get_decoration_string(id, DecorationUserTypeGOOGLE);
  6295. return user_type.compare(0, 16, "structuredbuffer") == 0 ||
  6296. user_type.compare(0, 18, "rwstructuredbuffer") == 0 ||
  6297. user_type.compare(0, 35, "globallycoherent rwstructuredbuffer") == 0 ||
  6298. user_type.compare(0, 33, "rasterizerorderedstructuredbuffer") == 0;
  6299. }
  6300. return false;
  6301. }
  6302. void CompilerHLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  6303. {
  6304. // Loading a full array of ClipDistance needs special consideration in mesh shaders
  6305. // since we cannot lower them by wrapping the variables in global statics.
  6306. // Fortunately, clip/cull is a proper vector in HLSL so we can lower with simple rvalue casts.
  6307. if (get_execution_model() != ExecutionModelMeshEXT ||
  6308. !has_decoration(target_id, DecorationBuiltIn) ||
  6309. !is_array(expr_type))
  6310. {
  6311. CompilerGLSL::cast_to_variable_store(target_id, expr, expr_type);
  6312. return;
  6313. }
  6314. auto builtin = BuiltIn(get_decoration(target_id, DecorationBuiltIn));
  6315. if (builtin != BuiltInClipDistance && builtin != BuiltInCullDistance)
  6316. {
  6317. CompilerGLSL::cast_to_variable_store(target_id, expr, expr_type);
  6318. return;
  6319. }
  6320. // Array of array means one thread is storing clip distance for all vertices. Nonsensical?
  6321. if (is_array(get<SPIRType>(expr_type.parent_type)))
  6322. SPIRV_CROSS_THROW("Attempting to store all mesh vertices in one go. This is not supported.");
  6323. uint32_t num_clip = to_array_size_literal(expr_type);
  6324. if (num_clip > 4)
  6325. SPIRV_CROSS_THROW("Number of clip or cull distances exceeds 4, this will not work with mesh shaders.");
  6326. if (num_clip == 1)
  6327. {
  6328. // We already emit array here.
  6329. CompilerGLSL::cast_to_variable_store(target_id, expr, expr_type);
  6330. return;
  6331. }
  6332. auto unrolled_expr = join("float", num_clip, "(");
  6333. for (uint32_t i = 0; i < num_clip; i++)
  6334. {
  6335. unrolled_expr += join(expr, "[", i, "]");
  6336. if (i + 1 < num_clip)
  6337. unrolled_expr += ", ";
  6338. }
  6339. unrolled_expr += ")";
  6340. expr = std::move(unrolled_expr);
  6341. }