2
0

spirv_glsl.cpp 655 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330
  1. /*
  2. * Copyright 2015-2021 Arm Limited
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * At your option, you may choose to accept this material under either:
  19. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  20. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  21. */
  22. #include "spirv_glsl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include "spirv_common.hpp"
  25. #include <algorithm>
  26. #include <assert.h>
  27. #include <cmath>
  28. #include <limits>
  29. #include <locale.h>
  30. #include <utility>
  31. #include <array>
  32. #ifndef _WIN32
  33. #include <langinfo.h>
  34. #endif
  35. #include <locale.h>
  36. using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE;
  37. using namespace SPIRV_CROSS_NAMESPACE;
  38. using namespace std;
  39. namespace SPIRV_CROSS_NAMESPACE
  40. {
  41. enum ExtraSubExpressionType
  42. {
  43. // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
  44. EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
  45. EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
  46. };
  47. struct GlslConstantNameMapping
  48. {
  49. uint32_t value;
  50. const char *alias;
  51. };
  52. #define DEF_GLSL_MAPPING(x) { x, "gl_" #x }
  53. #define DEF_GLSL_MAPPING_EXT(x) { x##KHR, "gl_" #x }
  54. static const GlslConstantNameMapping CoopVecComponentTypeNames[] = {
  55. DEF_GLSL_MAPPING(ComponentTypeFloat16NV),
  56. DEF_GLSL_MAPPING(ComponentTypeFloat32NV),
  57. DEF_GLSL_MAPPING(ComponentTypeFloat64NV),
  58. DEF_GLSL_MAPPING(ComponentTypeSignedInt8NV),
  59. DEF_GLSL_MAPPING(ComponentTypeSignedInt16NV),
  60. DEF_GLSL_MAPPING(ComponentTypeSignedInt32NV),
  61. DEF_GLSL_MAPPING(ComponentTypeSignedInt64NV),
  62. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8NV),
  63. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt16NV),
  64. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt32NV),
  65. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt64NV),
  66. DEF_GLSL_MAPPING(ComponentTypeSignedInt8PackedNV),
  67. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8PackedNV),
  68. DEF_GLSL_MAPPING(ComponentTypeFloatE4M3NV),
  69. DEF_GLSL_MAPPING(ComponentTypeFloatE5M2NV),
  70. };
  71. static const GlslConstantNameMapping CoopVecMatrixLayoutNames[] = {
  72. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutRowMajorNV),
  73. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutColumnMajorNV),
  74. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutInferencingOptimalNV),
  75. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutTrainingOptimalNV),
  76. };
  77. static const GlslConstantNameMapping CoopMatMatrixLayoutNames[] = {
  78. DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutRowMajor),
  79. DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutColumnMajor),
  80. };
  81. #undef DEF_GLSL_MAPPING
  82. #undef DEF_GLSL_MAPPING_EXT
  83. static bool is_unsigned_opcode(Op op)
  84. {
  85. // Don't have to be exhaustive, only relevant for legacy target checking ...
  86. switch (op)
  87. {
  88. case OpShiftRightLogical:
  89. case OpUGreaterThan:
  90. case OpUGreaterThanEqual:
  91. case OpULessThan:
  92. case OpULessThanEqual:
  93. case OpUConvert:
  94. case OpUDiv:
  95. case OpUMod:
  96. case OpUMulExtended:
  97. case OpConvertUToF:
  98. case OpConvertFToU:
  99. return true;
  100. default:
  101. return false;
  102. }
  103. }
  104. static bool is_unsigned_glsl_opcode(GLSLstd450 op)
  105. {
  106. // Don't have to be exhaustive, only relevant for legacy target checking ...
  107. switch (op)
  108. {
  109. case GLSLstd450UClamp:
  110. case GLSLstd450UMin:
  111. case GLSLstd450UMax:
  112. case GLSLstd450FindUMsb:
  113. return true;
  114. default:
  115. return false;
  116. }
  117. }
  118. static bool packing_is_vec4_padded(BufferPackingStandard packing)
  119. {
  120. switch (packing)
  121. {
  122. case BufferPackingHLSLCbuffer:
  123. case BufferPackingHLSLCbufferPackOffset:
  124. case BufferPackingStd140:
  125. case BufferPackingStd140EnhancedLayout:
  126. return true;
  127. default:
  128. return false;
  129. }
  130. }
  131. static bool packing_is_hlsl(BufferPackingStandard packing)
  132. {
  133. switch (packing)
  134. {
  135. case BufferPackingHLSLCbuffer:
  136. case BufferPackingHLSLCbufferPackOffset:
  137. return true;
  138. default:
  139. return false;
  140. }
  141. }
  142. static bool packing_has_flexible_offset(BufferPackingStandard packing)
  143. {
  144. switch (packing)
  145. {
  146. case BufferPackingStd140:
  147. case BufferPackingStd430:
  148. case BufferPackingScalar:
  149. case BufferPackingHLSLCbuffer:
  150. return false;
  151. default:
  152. return true;
  153. }
  154. }
  155. static bool packing_is_scalar(BufferPackingStandard packing)
  156. {
  157. switch (packing)
  158. {
  159. case BufferPackingScalar:
  160. case BufferPackingScalarEnhancedLayout:
  161. return true;
  162. default:
  163. return false;
  164. }
  165. }
  166. static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
  167. {
  168. switch (packing)
  169. {
  170. case BufferPackingStd140EnhancedLayout:
  171. return BufferPackingStd140;
  172. case BufferPackingStd430EnhancedLayout:
  173. return BufferPackingStd430;
  174. case BufferPackingHLSLCbufferPackOffset:
  175. return BufferPackingHLSLCbuffer;
  176. case BufferPackingScalarEnhancedLayout:
  177. return BufferPackingScalar;
  178. default:
  179. return packing;
  180. }
  181. }
  182. }
  183. void CompilerGLSL::init()
  184. {
  185. if (!ir.sources.empty() && ir.sources.front().known)
  186. {
  187. options.es = ir.sources.front().es;
  188. options.version = ir.sources.front().version;
  189. }
  190. // Query the locale to see what the decimal point is.
  191. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
  192. // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
  193. // tricky.
  194. #ifdef _WIN32
  195. // On Windows, localeconv uses thread-local storage, so it should be fine.
  196. const struct lconv *conv = localeconv();
  197. if (conv && conv->decimal_point)
  198. current_locale_radix_character = *conv->decimal_point;
  199. #elif defined(__ANDROID__) && __ANDROID_API__ < 26
  200. // nl_langinfo is not supported on this platform, fall back to the worse alternative.
  201. const struct lconv *conv = localeconv();
  202. if (conv && conv->decimal_point)
  203. current_locale_radix_character = *conv->decimal_point;
  204. #else
  205. // localeconv, the portable function is not MT safe ...
  206. const char *decimal_point = nl_langinfo(RADIXCHAR);
  207. if (decimal_point && *decimal_point != '\0')
  208. current_locale_radix_character = *decimal_point;
  209. #endif
  210. }
  211. static const char *to_pls_layout(PlsFormat format)
  212. {
  213. switch (format)
  214. {
  215. case PlsR11FG11FB10F:
  216. return "layout(r11f_g11f_b10f) ";
  217. case PlsR32F:
  218. return "layout(r32f) ";
  219. case PlsRG16F:
  220. return "layout(rg16f) ";
  221. case PlsRGB10A2:
  222. return "layout(rgb10_a2) ";
  223. case PlsRGBA8:
  224. return "layout(rgba8) ";
  225. case PlsRG16:
  226. return "layout(rg16) ";
  227. case PlsRGBA8I:
  228. return "layout(rgba8i)";
  229. case PlsRG16I:
  230. return "layout(rg16i) ";
  231. case PlsRGB10A2UI:
  232. return "layout(rgb10_a2ui) ";
  233. case PlsRGBA8UI:
  234. return "layout(rgba8ui) ";
  235. case PlsRG16UI:
  236. return "layout(rg16ui) ";
  237. case PlsR32UI:
  238. return "layout(r32ui) ";
  239. default:
  240. return "";
  241. }
  242. }
  243. static std::pair<Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format)
  244. {
  245. switch (format)
  246. {
  247. default:
  248. case PlsR11FG11FB10F:
  249. case PlsR32F:
  250. case PlsRG16F:
  251. case PlsRGB10A2:
  252. case PlsRGBA8:
  253. case PlsRG16:
  254. return std::make_pair(OpTypeFloat, SPIRType::Float);
  255. case PlsRGBA8I:
  256. case PlsRG16I:
  257. return std::make_pair(OpTypeInt, SPIRType::Int);
  258. case PlsRGB10A2UI:
  259. case PlsRGBA8UI:
  260. case PlsRG16UI:
  261. case PlsR32UI:
  262. return std::make_pair(OpTypeInt, SPIRType::UInt);
  263. }
  264. }
  265. static uint32_t pls_format_to_components(PlsFormat format)
  266. {
  267. switch (format)
  268. {
  269. default:
  270. case PlsR32F:
  271. case PlsR32UI:
  272. return 1;
  273. case PlsRG16F:
  274. case PlsRG16:
  275. case PlsRG16UI:
  276. case PlsRG16I:
  277. return 2;
  278. case PlsR11FG11FB10F:
  279. return 3;
  280. case PlsRGB10A2:
  281. case PlsRGBA8:
  282. case PlsRGBA8I:
  283. case PlsRGB10A2UI:
  284. case PlsRGBA8UI:
  285. return 4;
  286. }
  287. }
  288. const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
  289. {
  290. static const char *const swizzle[4][4] = {
  291. { ".x", ".y", ".z", ".w" },
  292. { ".xy", ".yz", ".zw", nullptr },
  293. { ".xyz", ".yzw", nullptr, nullptr },
  294. #if defined(__GNUC__) && (__GNUC__ == 9)
  295. // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
  296. // This array ends up being compiled as all nullptrs, tripping the assertions below.
  297. { "", nullptr, nullptr, "$" },
  298. #else
  299. { "", nullptr, nullptr, nullptr },
  300. #endif
  301. };
  302. assert(vecsize >= 1 && vecsize <= 4);
  303. assert(index >= 0 && index < 4);
  304. assert(swizzle[vecsize - 1][index]);
  305. return swizzle[vecsize - 1][index];
  306. }
  307. void CompilerGLSL::reset(uint32_t iteration_count)
  308. {
  309. // Sanity check the iteration count to be robust against a certain class of bugs where
  310. // we keep forcing recompilations without making clear forward progress.
  311. // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
  312. // Certain types of recompilations are considered to make forward progress,
  313. // but in almost all situations, we'll never see more than 3 iterations.
  314. // It is highly context-sensitive when we need to force recompilation,
  315. // and it is not practical with the current architecture
  316. // to resolve everything up front.
  317. if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
  318. SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
  319. // We do some speculative optimizations which should pretty much always work out,
  320. // but just in case the SPIR-V is rather weird, recompile until it's happy.
  321. // This typically only means one extra pass.
  322. clear_force_recompile();
  323. // Clear invalid expression tracking.
  324. invalid_expressions.clear();
  325. composite_insert_overwritten.clear();
  326. current_function = nullptr;
  327. // Clear temporary usage tracking.
  328. expression_usage_counts.clear();
  329. forwarded_temporaries.clear();
  330. suppressed_usage_tracking.clear();
  331. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  332. flushed_phi_variables.clear();
  333. current_emitting_switch_stack.clear();
  334. reset_name_caches();
  335. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
  336. func.active = false;
  337. func.flush_undeclared = true;
  338. });
  339. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
  340. ir.reset_all_of_type<SPIRExpression>();
  341. ir.reset_all_of_type<SPIRAccessChain>();
  342. statement_count = 0;
  343. indent = 0;
  344. current_loop_level = 0;
  345. }
  346. void CompilerGLSL::remap_pls_variables()
  347. {
  348. for (auto &input : pls_inputs)
  349. {
  350. auto &var = get<SPIRVariable>(input.id);
  351. bool input_is_target = false;
  352. if (var.storage == StorageClassUniformConstant)
  353. {
  354. auto &type = get<SPIRType>(var.basetype);
  355. input_is_target = type.image.dim == DimSubpassData;
  356. }
  357. if (var.storage != StorageClassInput && !input_is_target)
  358. SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
  359. var.remapped_variable = true;
  360. }
  361. for (auto &output : pls_outputs)
  362. {
  363. auto &var = get<SPIRVariable>(output.id);
  364. if (var.storage != StorageClassOutput)
  365. SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
  366. var.remapped_variable = true;
  367. }
  368. }
  369. void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
  370. {
  371. subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
  372. inout_color_attachments.push_back({ color_location, coherent });
  373. }
  374. bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
  375. {
  376. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  377. [&](const std::pair<uint32_t, bool> &elem) {
  378. return elem.first == location;
  379. }) != end(inout_color_attachments);
  380. }
  381. bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
  382. {
  383. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  384. [&](const std::pair<uint32_t, bool> &elem) {
  385. return elem.first == location && !elem.second;
  386. }) != end(inout_color_attachments);
  387. }
  388. void CompilerGLSL::find_static_extensions()
  389. {
  390. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  391. if (type.basetype == SPIRType::Double)
  392. {
  393. if (options.es)
  394. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  395. if (!options.es && options.version < 400)
  396. require_extension_internal("GL_ARB_gpu_shader_fp64");
  397. }
  398. else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
  399. {
  400. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  401. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  402. require_extension_internal("GL_ARB_gpu_shader_int64");
  403. }
  404. else if (type.basetype == SPIRType::Half)
  405. {
  406. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
  407. if (options.vulkan_semantics)
  408. require_extension_internal("GL_EXT_shader_16bit_storage");
  409. }
  410. else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
  411. {
  412. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
  413. if (options.vulkan_semantics)
  414. require_extension_internal("GL_EXT_shader_8bit_storage");
  415. }
  416. else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
  417. {
  418. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
  419. if (options.vulkan_semantics)
  420. require_extension_internal("GL_EXT_shader_16bit_storage");
  421. }
  422. });
  423. auto &execution = get_entry_point();
  424. switch (execution.model)
  425. {
  426. case ExecutionModelGLCompute:
  427. if (!options.es && options.version < 430)
  428. require_extension_internal("GL_ARB_compute_shader");
  429. if (options.es && options.version < 310)
  430. SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
  431. break;
  432. case ExecutionModelGeometry:
  433. if (options.es && options.version < 320)
  434. require_extension_internal("GL_EXT_geometry_shader");
  435. if (!options.es && options.version < 150)
  436. require_extension_internal("GL_ARB_geometry_shader4");
  437. if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
  438. {
  439. // Instanced GS is part of 400 core or this extension.
  440. if (!options.es && options.version < 400)
  441. require_extension_internal("GL_ARB_gpu_shader5");
  442. }
  443. break;
  444. case ExecutionModelTessellationEvaluation:
  445. case ExecutionModelTessellationControl:
  446. if (options.es && options.version < 320)
  447. require_extension_internal("GL_EXT_tessellation_shader");
  448. if (!options.es && options.version < 400)
  449. require_extension_internal("GL_ARB_tessellation_shader");
  450. break;
  451. case ExecutionModelRayGenerationKHR:
  452. case ExecutionModelIntersectionKHR:
  453. case ExecutionModelAnyHitKHR:
  454. case ExecutionModelClosestHitKHR:
  455. case ExecutionModelMissKHR:
  456. case ExecutionModelCallableKHR:
  457. // NV enums are aliases.
  458. if (options.es || options.version < 460)
  459. SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
  460. if (!options.vulkan_semantics)
  461. SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
  462. // Need to figure out if we should target KHR or NV extension based on capabilities.
  463. for (auto &cap : ir.declared_capabilities)
  464. {
  465. if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
  466. cap == CapabilityRayTraversalPrimitiveCullingKHR)
  467. {
  468. ray_tracing_is_khr = true;
  469. break;
  470. }
  471. }
  472. if (ray_tracing_is_khr)
  473. {
  474. // In KHR ray tracing we pass payloads by pointer instead of location,
  475. // so make sure we assign locations properly.
  476. ray_tracing_khr_fixup_locations();
  477. require_extension_internal("GL_EXT_ray_tracing");
  478. }
  479. else
  480. require_extension_internal("GL_NV_ray_tracing");
  481. break;
  482. case ExecutionModelMeshEXT:
  483. case ExecutionModelTaskEXT:
  484. if (options.es || options.version < 450)
  485. SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
  486. if (!options.vulkan_semantics)
  487. SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
  488. require_extension_internal("GL_EXT_mesh_shader");
  489. break;
  490. default:
  491. break;
  492. }
  493. if (!pls_inputs.empty() || !pls_outputs.empty())
  494. {
  495. if (execution.model != ExecutionModelFragment)
  496. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
  497. require_extension_internal("GL_EXT_shader_pixel_local_storage");
  498. }
  499. if (!inout_color_attachments.empty())
  500. {
  501. if (execution.model != ExecutionModelFragment)
  502. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
  503. if (options.vulkan_semantics)
  504. SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
  505. bool has_coherent = false;
  506. bool has_incoherent = false;
  507. for (auto &att : inout_color_attachments)
  508. {
  509. if (att.second)
  510. has_coherent = true;
  511. else
  512. has_incoherent = true;
  513. }
  514. if (has_coherent)
  515. require_extension_internal("GL_EXT_shader_framebuffer_fetch");
  516. if (has_incoherent)
  517. require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
  518. }
  519. if (options.separate_shader_objects && !options.es && options.version < 410)
  520. require_extension_internal("GL_ARB_separate_shader_objects");
  521. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  522. {
  523. if (!options.vulkan_semantics)
  524. SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
  525. if (options.es && options.version < 320)
  526. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
  527. else if (!options.es && options.version < 450)
  528. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
  529. require_extension_internal("GL_EXT_buffer_reference2");
  530. }
  531. else if (ir.addressing_model != AddressingModelLogical)
  532. {
  533. SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64 addressing models are supported.");
  534. }
  535. // Check for nonuniform qualifier and passthrough.
  536. // Instead of looping over all decorations to find this, just look at capabilities.
  537. for (auto &cap : ir.declared_capabilities)
  538. {
  539. switch (cap)
  540. {
  541. case CapabilityShaderNonUniformEXT:
  542. if (!options.vulkan_semantics)
  543. require_extension_internal("GL_NV_gpu_shader5");
  544. else
  545. require_extension_internal("GL_EXT_nonuniform_qualifier");
  546. break;
  547. case CapabilityRuntimeDescriptorArrayEXT:
  548. if (!options.vulkan_semantics)
  549. SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
  550. require_extension_internal("GL_EXT_nonuniform_qualifier");
  551. break;
  552. case CapabilityGeometryShaderPassthroughNV:
  553. if (execution.model == ExecutionModelGeometry)
  554. {
  555. require_extension_internal("GL_NV_geometry_shader_passthrough");
  556. execution.geometry_passthrough = true;
  557. }
  558. break;
  559. case CapabilityVariablePointers:
  560. case CapabilityVariablePointersStorageBuffer:
  561. SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
  562. case CapabilityMultiView:
  563. if (options.vulkan_semantics)
  564. require_extension_internal("GL_EXT_multiview");
  565. else
  566. {
  567. require_extension_internal("GL_OVR_multiview2");
  568. if (options.ovr_multiview_view_count == 0)
  569. SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
  570. if (get_execution_model() != ExecutionModelVertex)
  571. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  572. }
  573. break;
  574. case CapabilityRayQueryKHR:
  575. if (options.es || options.version < 460 || !options.vulkan_semantics)
  576. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  577. require_extension_internal("GL_EXT_ray_query");
  578. ray_tracing_is_khr = true;
  579. break;
  580. case CapabilityRayQueryPositionFetchKHR:
  581. if (options.es || options.version < 460 || !options.vulkan_semantics)
  582. SPIRV_CROSS_THROW("RayQuery Position Fetch requires Vulkan GLSL 460.");
  583. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  584. ray_tracing_is_khr = true;
  585. break;
  586. case CapabilityRayTracingPositionFetchKHR:
  587. if (options.es || options.version < 460 || !options.vulkan_semantics)
  588. SPIRV_CROSS_THROW("Ray Tracing Position Fetch requires Vulkan GLSL 460.");
  589. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  590. ray_tracing_is_khr = true;
  591. break;
  592. case CapabilityRayTraversalPrimitiveCullingKHR:
  593. if (options.es || options.version < 460 || !options.vulkan_semantics)
  594. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  595. require_extension_internal("GL_EXT_ray_flags_primitive_culling");
  596. ray_tracing_is_khr = true;
  597. break;
  598. case CapabilityRayTracingClusterAccelerationStructureNV:
  599. if (options.es || options.version < 460 || !options.vulkan_semantics)
  600. SPIRV_CROSS_THROW("Cluster AS requires Vulkan GLSL 460.");
  601. require_extension_internal("GL_NV_cluster_acceleration_structure");
  602. ray_tracing_is_khr = true;
  603. break;
  604. case CapabilityTensorsARM:
  605. if (options.es || options.version < 460 || !options.vulkan_semantics)
  606. SPIRV_CROSS_THROW("Tensor requires Vulkan GLSL 460.");
  607. require_extension_internal("GL_ARM_tensors");
  608. break;
  609. default:
  610. break;
  611. }
  612. }
  613. if (options.ovr_multiview_view_count)
  614. {
  615. if (options.vulkan_semantics)
  616. SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
  617. if (get_execution_model() != ExecutionModelVertex)
  618. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  619. require_extension_internal("GL_OVR_multiview2");
  620. }
  621. if (execution.flags.get(ExecutionModeQuadDerivativesKHR) ||
  622. (execution.flags.get(ExecutionModeRequireFullQuadsKHR) && get_execution_model() == ExecutionModelFragment))
  623. {
  624. require_extension_internal("GL_EXT_shader_quad_control");
  625. }
  626. // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
  627. for (auto &ext : ir.declared_extensions)
  628. if (ext == "SPV_NV_fragment_shader_barycentric")
  629. barycentric_is_nv = true;
  630. }
  631. void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
  632. {
  633. uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ?
  634. required_polyfills_relaxed : required_polyfills;
  635. if ((polyfills & polyfill) == 0)
  636. {
  637. polyfills |= polyfill;
  638. force_recompile();
  639. }
  640. }
  641. void CompilerGLSL::ray_tracing_khr_fixup_locations()
  642. {
  643. uint32_t location = 0;
  644. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  645. // Incoming payload storage can also be used for tracing.
  646. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
  647. var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
  648. return;
  649. if (is_hidden_variable(var))
  650. return;
  651. set_decoration(var.self, DecorationLocation, location++);
  652. });
  653. }
  654. string CompilerGLSL::compile()
  655. {
  656. ir.fixup_reserved_names();
  657. if (!options.vulkan_semantics)
  658. {
  659. // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
  660. backend.nonuniform_qualifier = "";
  661. backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
  662. }
  663. backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
  664. backend.force_gl_in_out_block = true;
  665. backend.supports_extensions = true;
  666. backend.use_array_constructor = true;
  667. backend.workgroup_size_is_hidden = true;
  668. backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
  669. backend.support_precise_qualifier =
  670. (!options.es && options.version >= 400) || (options.es && options.version >= 320);
  671. backend.constant_null_initializer = "{ }";
  672. backend.requires_matching_array_initializer = true;
  673. if (is_legacy_es())
  674. backend.support_case_fallthrough = false;
  675. // Scan the SPIR-V to find trivial uses of extensions.
  676. fixup_anonymous_struct_names();
  677. fixup_type_alias();
  678. reorder_type_alias();
  679. build_function_control_flow_graphs_and_analyze();
  680. find_static_extensions();
  681. fixup_image_load_store_access();
  682. update_active_builtins();
  683. analyze_image_and_sampler_usage();
  684. analyze_interlocked_resource_usage();
  685. if (!inout_color_attachments.empty())
  686. emit_inout_fragment_outputs_copy_to_subpass_inputs();
  687. // Shaders might cast unrelated data to pointers of non-block types.
  688. // Find all such instances and make sure we can cast the pointers to a synthesized block type.
  689. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  690. analyze_non_block_pointer_types();
  691. uint32_t pass_count = 0;
  692. do
  693. {
  694. reset(pass_count);
  695. buffer.reset();
  696. emit_header();
  697. emit_resources();
  698. emit_extension_workarounds(get_execution_model());
  699. if (required_polyfills != 0)
  700. emit_polyfills(required_polyfills, false);
  701. if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0)
  702. emit_polyfills(required_polyfills_relaxed, true);
  703. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  704. pass_count++;
  705. } while (is_forcing_recompilation());
  706. // Implement the interlocked wrapper function at the end.
  707. // The body was implemented in lieu of main().
  708. if (interlocked_is_complex)
  709. {
  710. statement("void main()");
  711. begin_scope();
  712. statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
  713. statement("SPIRV_Cross_beginInvocationInterlock();");
  714. statement("spvMainInterlockedBody();");
  715. statement("SPIRV_Cross_endInvocationInterlock();");
  716. end_scope();
  717. }
  718. // Entry point in GLSL is always main().
  719. get_entry_point().name = "main";
  720. return buffer.str();
  721. }
  722. std::string CompilerGLSL::get_partial_source()
  723. {
  724. return buffer.str();
  725. }
  726. void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
  727. const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
  728. {
  729. auto &execution = get_entry_point();
  730. bool builtin_workgroup = execution.workgroup_size.constant != 0;
  731. bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
  732. if (wg_x.id)
  733. {
  734. if (options.vulkan_semantics)
  735. arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
  736. else
  737. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
  738. }
  739. else if (use_local_size_id && execution.workgroup_size.id_x)
  740. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
  741. else
  742. arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
  743. if (wg_y.id)
  744. {
  745. if (options.vulkan_semantics)
  746. arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
  747. else
  748. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
  749. }
  750. else if (use_local_size_id && execution.workgroup_size.id_y)
  751. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
  752. else
  753. arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
  754. if (wg_z.id)
  755. {
  756. if (options.vulkan_semantics)
  757. arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
  758. else
  759. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
  760. }
  761. else if (use_local_size_id && execution.workgroup_size.id_z)
  762. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
  763. else
  764. arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
  765. }
  766. void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
  767. {
  768. if (options.vulkan_semantics)
  769. {
  770. auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
  771. require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
  772. }
  773. else
  774. {
  775. if (!shader_subgroup_supporter.is_feature_requested(feature))
  776. force_recompile();
  777. shader_subgroup_supporter.request_feature(feature);
  778. }
  779. }
  780. void CompilerGLSL::emit_header()
  781. {
  782. auto &execution = get_entry_point();
  783. statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
  784. if (!options.es && options.version < 420)
  785. {
  786. // Needed for binding = # on UBOs, etc.
  787. if (options.enable_420pack_extension)
  788. {
  789. statement("#ifdef GL_ARB_shading_language_420pack");
  790. statement("#extension GL_ARB_shading_language_420pack : require");
  791. statement("#endif");
  792. }
  793. // Needed for: layout(early_fragment_tests) in;
  794. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  795. require_extension_internal("GL_ARB_shader_image_load_store");
  796. }
  797. // Needed for: layout(post_depth_coverage) in;
  798. if (execution.flags.get(ExecutionModePostDepthCoverage))
  799. require_extension_internal("GL_ARB_post_depth_coverage");
  800. // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
  801. bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
  802. execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
  803. execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
  804. execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
  805. if (interlock_used)
  806. {
  807. if (options.es)
  808. {
  809. if (options.version < 310)
  810. SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
  811. require_extension_internal("GL_NV_fragment_shader_interlock");
  812. }
  813. else
  814. {
  815. if (options.version < 420)
  816. require_extension_internal("GL_ARB_shader_image_load_store");
  817. require_extension_internal("GL_ARB_fragment_shader_interlock");
  818. }
  819. }
  820. for (auto &ext : forced_extensions)
  821. {
  822. if (ext == "GL_ARB_gpu_shader_int64")
  823. {
  824. statement("#if defined(GL_ARB_gpu_shader_int64)");
  825. statement("#extension GL_ARB_gpu_shader_int64 : require");
  826. if (!options.vulkan_semantics || options.es)
  827. {
  828. statement("#elif defined(GL_NV_gpu_shader5)");
  829. statement("#extension GL_NV_gpu_shader5 : require");
  830. }
  831. statement("#else");
  832. statement("#error No extension available for 64-bit integers.");
  833. statement("#endif");
  834. }
  835. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
  836. {
  837. // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
  838. // GL_AMD_gpu_shader_half_float is a superset, so try that first.
  839. statement("#if defined(GL_AMD_gpu_shader_half_float)");
  840. statement("#extension GL_AMD_gpu_shader_half_float : require");
  841. if (!options.vulkan_semantics)
  842. {
  843. statement("#elif defined(GL_NV_gpu_shader5)");
  844. statement("#extension GL_NV_gpu_shader5 : require");
  845. }
  846. else
  847. {
  848. statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
  849. statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
  850. }
  851. statement("#else");
  852. statement("#error No extension available for FP16.");
  853. statement("#endif");
  854. }
  855. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
  856. {
  857. if (options.vulkan_semantics)
  858. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  859. else
  860. {
  861. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
  862. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  863. statement("#elif defined(GL_NV_gpu_shader5)");
  864. statement("#extension GL_NV_gpu_shader5 : require");
  865. statement("#else");
  866. statement("#error No extension available for Int8.");
  867. statement("#endif");
  868. }
  869. }
  870. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
  871. {
  872. if (options.vulkan_semantics)
  873. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  874. else
  875. {
  876. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
  877. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  878. statement("#elif defined(GL_AMD_gpu_shader_int16)");
  879. statement("#extension GL_AMD_gpu_shader_int16 : require");
  880. statement("#elif defined(GL_NV_gpu_shader5)");
  881. statement("#extension GL_NV_gpu_shader5 : require");
  882. statement("#else");
  883. statement("#error No extension available for Int16.");
  884. statement("#endif");
  885. }
  886. }
  887. else if (ext == "GL_ARB_post_depth_coverage")
  888. {
  889. if (options.es)
  890. statement("#extension GL_EXT_post_depth_coverage : require");
  891. else
  892. {
  893. statement("#if defined(GL_ARB_post_depth_coverge)");
  894. statement("#extension GL_ARB_post_depth_coverage : require");
  895. statement("#else");
  896. statement("#extension GL_EXT_post_depth_coverage : require");
  897. statement("#endif");
  898. }
  899. }
  900. else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
  901. {
  902. // Soft-enable this extension on plain GLSL.
  903. statement("#ifdef ", ext);
  904. statement("#extension ", ext, " : enable");
  905. statement("#endif");
  906. }
  907. else if (ext == "GL_EXT_control_flow_attributes")
  908. {
  909. // These are just hints so we can conditionally enable and fallback in the shader.
  910. statement("#if defined(GL_EXT_control_flow_attributes)");
  911. statement("#extension GL_EXT_control_flow_attributes : require");
  912. statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
  913. statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
  914. statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
  915. statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
  916. statement("#else");
  917. statement("#define SPIRV_CROSS_FLATTEN");
  918. statement("#define SPIRV_CROSS_BRANCH");
  919. statement("#define SPIRV_CROSS_UNROLL");
  920. statement("#define SPIRV_CROSS_LOOP");
  921. statement("#endif");
  922. }
  923. else if (ext == "GL_NV_fragment_shader_interlock")
  924. {
  925. statement("#extension GL_NV_fragment_shader_interlock : require");
  926. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
  927. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
  928. }
  929. else if (ext == "GL_ARB_fragment_shader_interlock")
  930. {
  931. statement("#ifdef GL_ARB_fragment_shader_interlock");
  932. statement("#extension GL_ARB_fragment_shader_interlock : enable");
  933. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
  934. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
  935. statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
  936. statement("#extension GL_INTEL_fragment_shader_ordering : enable");
  937. statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
  938. statement("#define SPIRV_Cross_endInvocationInterlock()");
  939. statement("#endif");
  940. }
  941. else
  942. statement("#extension ", ext, " : require");
  943. }
  944. if (!options.vulkan_semantics)
  945. {
  946. using Supp = ShaderSubgroupSupportHelper;
  947. auto result = shader_subgroup_supporter.resolve();
  948. for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
  949. {
  950. auto feature = static_cast<Supp::Feature>(feature_index);
  951. if (!shader_subgroup_supporter.is_feature_requested(feature))
  952. continue;
  953. auto exts = Supp::get_candidates_for_feature(feature, result);
  954. if (exts.empty())
  955. continue;
  956. statement("");
  957. for (auto &ext : exts)
  958. {
  959. const char *name = Supp::get_extension_name(ext);
  960. const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
  961. auto extra_names = Supp::get_extra_required_extension_names(ext);
  962. statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
  963. (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
  964. for (const auto &e : extra_names)
  965. statement("#extension ", e, " : enable");
  966. statement("#extension ", name, " : require");
  967. }
  968. if (!Supp::can_feature_be_implemented_without_extensions(feature))
  969. {
  970. statement("#else");
  971. statement("#error No extensions available to emulate requested subgroup feature.");
  972. }
  973. statement("#endif");
  974. }
  975. }
  976. for (auto &header : header_lines)
  977. statement(header);
  978. SmallVector<string> inputs;
  979. SmallVector<string> outputs;
  980. switch (execution.model)
  981. {
  982. case ExecutionModelVertex:
  983. if (options.ovr_multiview_view_count)
  984. inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
  985. break;
  986. case ExecutionModelGeometry:
  987. if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
  988. inputs.push_back(join("invocations = ", execution.invocations));
  989. if (execution.flags.get(ExecutionModeInputPoints))
  990. inputs.push_back("points");
  991. if (execution.flags.get(ExecutionModeInputLines))
  992. inputs.push_back("lines");
  993. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  994. inputs.push_back("lines_adjacency");
  995. if (execution.flags.get(ExecutionModeTriangles))
  996. inputs.push_back("triangles");
  997. if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  998. inputs.push_back("triangles_adjacency");
  999. if (!execution.geometry_passthrough)
  1000. {
  1001. // For passthrough, these are implies and cannot be declared in shader.
  1002. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  1003. if (execution.flags.get(ExecutionModeOutputTriangleStrip))
  1004. outputs.push_back("triangle_strip");
  1005. if (execution.flags.get(ExecutionModeOutputPoints))
  1006. outputs.push_back("points");
  1007. if (execution.flags.get(ExecutionModeOutputLineStrip))
  1008. outputs.push_back("line_strip");
  1009. }
  1010. break;
  1011. case ExecutionModelTessellationControl:
  1012. if (execution.flags.get(ExecutionModeOutputVertices))
  1013. outputs.push_back(join("vertices = ", execution.output_vertices));
  1014. break;
  1015. case ExecutionModelTessellationEvaluation:
  1016. if (execution.flags.get(ExecutionModeQuads))
  1017. inputs.push_back("quads");
  1018. if (execution.flags.get(ExecutionModeTriangles))
  1019. inputs.push_back("triangles");
  1020. if (execution.flags.get(ExecutionModeIsolines))
  1021. inputs.push_back("isolines");
  1022. if (execution.flags.get(ExecutionModePointMode))
  1023. inputs.push_back("point_mode");
  1024. if (!execution.flags.get(ExecutionModeIsolines))
  1025. {
  1026. if (execution.flags.get(ExecutionModeVertexOrderCw))
  1027. inputs.push_back("cw");
  1028. if (execution.flags.get(ExecutionModeVertexOrderCcw))
  1029. inputs.push_back("ccw");
  1030. }
  1031. if (execution.flags.get(ExecutionModeSpacingFractionalEven))
  1032. inputs.push_back("fractional_even_spacing");
  1033. if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
  1034. inputs.push_back("fractional_odd_spacing");
  1035. if (execution.flags.get(ExecutionModeSpacingEqual))
  1036. inputs.push_back("equal_spacing");
  1037. break;
  1038. case ExecutionModelGLCompute:
  1039. case ExecutionModelTaskEXT:
  1040. case ExecutionModelMeshEXT:
  1041. {
  1042. if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
  1043. {
  1044. SpecializationConstant wg_x, wg_y, wg_z;
  1045. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  1046. // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
  1047. // declarations before we can emit the work group size.
  1048. if (options.vulkan_semantics ||
  1049. ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
  1050. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  1051. }
  1052. else
  1053. {
  1054. inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
  1055. inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
  1056. inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
  1057. }
  1058. if (execution.model == ExecutionModelMeshEXT)
  1059. {
  1060. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  1061. outputs.push_back(join("max_primitives = ", execution.output_primitives));
  1062. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  1063. outputs.push_back("triangles");
  1064. else if (execution.flags.get(ExecutionModeOutputLinesEXT))
  1065. outputs.push_back("lines");
  1066. else if (execution.flags.get(ExecutionModeOutputPoints))
  1067. outputs.push_back("points");
  1068. }
  1069. break;
  1070. }
  1071. case ExecutionModelFragment:
  1072. if (options.es)
  1073. {
  1074. switch (options.fragment.default_float_precision)
  1075. {
  1076. case Options::Lowp:
  1077. statement("precision lowp float;");
  1078. break;
  1079. case Options::Mediump:
  1080. statement("precision mediump float;");
  1081. break;
  1082. case Options::Highp:
  1083. statement("precision highp float;");
  1084. break;
  1085. default:
  1086. break;
  1087. }
  1088. switch (options.fragment.default_int_precision)
  1089. {
  1090. case Options::Lowp:
  1091. statement("precision lowp int;");
  1092. break;
  1093. case Options::Mediump:
  1094. statement("precision mediump int;");
  1095. break;
  1096. case Options::Highp:
  1097. statement("precision highp int;");
  1098. break;
  1099. default:
  1100. break;
  1101. }
  1102. }
  1103. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  1104. inputs.push_back("early_fragment_tests");
  1105. if (execution.flags.get(ExecutionModePostDepthCoverage))
  1106. inputs.push_back("post_depth_coverage");
  1107. if (interlock_used)
  1108. statement("#if defined(GL_ARB_fragment_shader_interlock)");
  1109. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
  1110. statement("layout(pixel_interlock_ordered) in;");
  1111. else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
  1112. statement("layout(pixel_interlock_unordered) in;");
  1113. else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
  1114. statement("layout(sample_interlock_ordered) in;");
  1115. else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  1116. statement("layout(sample_interlock_unordered) in;");
  1117. if (interlock_used)
  1118. {
  1119. statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
  1120. statement("#error Fragment Shader Interlock/Ordering extension missing!");
  1121. statement("#endif");
  1122. }
  1123. if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
  1124. statement("layout(depth_greater) out float gl_FragDepth;");
  1125. else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
  1126. statement("layout(depth_less) out float gl_FragDepth;");
  1127. if (execution.flags.get(ExecutionModeRequireFullQuadsKHR))
  1128. statement("layout(full_quads) in;");
  1129. break;
  1130. default:
  1131. break;
  1132. }
  1133. for (auto &cap : ir.declared_capabilities)
  1134. if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
  1135. statement("layout(primitive_culling);");
  1136. if (execution.flags.get(ExecutionModeQuadDerivativesKHR))
  1137. statement("layout(quad_derivatives) in;");
  1138. if (!inputs.empty())
  1139. statement("layout(", merge(inputs), ") in;");
  1140. if (!outputs.empty())
  1141. statement("layout(", merge(outputs), ") out;");
  1142. statement("");
  1143. }
  1144. bool CompilerGLSL::type_is_empty(const SPIRType &type)
  1145. {
  1146. return type.basetype == SPIRType::Struct && type.member_types.empty();
  1147. }
  1148. void CompilerGLSL::emit_struct(SPIRType &type)
  1149. {
  1150. // Struct types can be stamped out multiple times
  1151. // with just different offsets, matrix layouts, etc ...
  1152. // Type-punning with these types is legal, which complicates things
  1153. // when we are storing struct and array types in an SSBO for example.
  1154. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
  1155. if (type.type_alias != TypeID(0) &&
  1156. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  1157. return;
  1158. add_resource_name(type.self);
  1159. auto name = type_to_glsl(type);
  1160. statement(!backend.explicit_struct_type ? "struct " : "", name);
  1161. begin_scope();
  1162. type.member_name_cache.clear();
  1163. uint32_t i = 0;
  1164. bool emitted = false;
  1165. for (auto &member : type.member_types)
  1166. {
  1167. add_member_name(type, i);
  1168. emit_struct_member(type, member, i);
  1169. i++;
  1170. emitted = true;
  1171. }
  1172. // Don't declare empty structs in GLSL, this is not allowed.
  1173. if (type_is_empty(type) && !backend.supports_empty_struct)
  1174. {
  1175. statement("int empty_struct_member;");
  1176. emitted = true;
  1177. }
  1178. if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
  1179. emit_struct_padding_target(type);
  1180. end_scope_decl();
  1181. if (emitted)
  1182. statement("");
  1183. }
  1184. string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
  1185. {
  1186. string res;
  1187. //if (flags & (1ull << DecorationSmooth))
  1188. // res += "smooth ";
  1189. if (flags.get(DecorationFlat))
  1190. res += "flat ";
  1191. if (flags.get(DecorationNoPerspective))
  1192. {
  1193. if (options.es)
  1194. {
  1195. if (options.version < 300)
  1196. SPIRV_CROSS_THROW("noperspective requires ESSL 300.");
  1197. require_extension_internal("GL_NV_shader_noperspective_interpolation");
  1198. }
  1199. else if (is_legacy_desktop())
  1200. require_extension_internal("GL_EXT_gpu_shader4");
  1201. res += "noperspective ";
  1202. }
  1203. if (flags.get(DecorationCentroid))
  1204. res += "centroid ";
  1205. if (flags.get(DecorationPatch))
  1206. res += "patch ";
  1207. if (flags.get(DecorationSample))
  1208. {
  1209. if (options.es)
  1210. {
  1211. if (options.version < 300)
  1212. SPIRV_CROSS_THROW("sample requires ESSL 300.");
  1213. else if (options.version < 320)
  1214. require_extension_internal("GL_OES_shader_multisample_interpolation");
  1215. }
  1216. res += "sample ";
  1217. }
  1218. if (flags.get(DecorationInvariant) && (options.es || options.version >= 120))
  1219. res += "invariant ";
  1220. if (flags.get(DecorationPerPrimitiveEXT))
  1221. {
  1222. res += "perprimitiveEXT ";
  1223. require_extension_internal("GL_EXT_mesh_shader");
  1224. }
  1225. if (flags.get(DecorationExplicitInterpAMD))
  1226. {
  1227. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  1228. res += "__explicitInterpAMD ";
  1229. }
  1230. if (flags.get(DecorationPerVertexKHR))
  1231. {
  1232. if (options.es && options.version < 320)
  1233. SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
  1234. else if (!options.es && options.version < 450)
  1235. SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
  1236. if (barycentric_is_nv)
  1237. {
  1238. require_extension_internal("GL_NV_fragment_shader_barycentric");
  1239. res += "pervertexNV ";
  1240. }
  1241. else
  1242. {
  1243. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  1244. res += "pervertexEXT ";
  1245. }
  1246. }
  1247. return res;
  1248. }
  1249. string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
  1250. {
  1251. if (is_legacy())
  1252. return "";
  1253. bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1254. if (!is_block)
  1255. return "";
  1256. auto &memb = ir.meta[type.self].members;
  1257. if (index >= memb.size())
  1258. return "";
  1259. auto &dec = memb[index];
  1260. SmallVector<string> attr;
  1261. if (has_member_decoration(type.self, index, DecorationPassthroughNV))
  1262. attr.push_back("passthrough");
  1263. // We can only apply layouts on members in block interfaces.
  1264. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
  1265. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
  1266. // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
  1267. //
  1268. // We would like to go from (SPIR-V style):
  1269. //
  1270. // struct Foo { layout(row_major) mat4 matrix; };
  1271. // buffer UBO { Foo foo; };
  1272. //
  1273. // to
  1274. //
  1275. // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
  1276. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
  1277. auto flags = combined_decoration_for_member(type, index);
  1278. if (flags.get(DecorationRowMajor))
  1279. attr.push_back("row_major");
  1280. // We don't emit any global layouts, so column_major is default.
  1281. //if (flags & (1ull << DecorationColMajor))
  1282. // attr.push_back("column_major");
  1283. if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
  1284. attr.push_back(join("location = ", dec.location));
  1285. // Can only declare component if we can declare location.
  1286. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
  1287. {
  1288. if (!options.es)
  1289. {
  1290. if (options.version < 440 && options.version >= 140)
  1291. require_extension_internal("GL_ARB_enhanced_layouts");
  1292. else if (options.version < 140)
  1293. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  1294. attr.push_back(join("component = ", dec.component));
  1295. }
  1296. else
  1297. SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
  1298. }
  1299. // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
  1300. // This is only done selectively in GLSL as needed.
  1301. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
  1302. dec.decoration_flags.get(DecorationOffset))
  1303. attr.push_back(join("offset = ", dec.offset));
  1304. else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
  1305. attr.push_back(join("xfb_offset = ", dec.offset));
  1306. if (attr.empty())
  1307. return "";
  1308. string res = "layout(";
  1309. res += merge(attr);
  1310. res += ") ";
  1311. return res;
  1312. }
  1313. const char *CompilerGLSL::format_to_glsl(ImageFormat format)
  1314. {
  1315. if (options.es && is_desktop_only_format(format))
  1316. SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
  1317. switch (format)
  1318. {
  1319. case ImageFormatRgba32f:
  1320. return "rgba32f";
  1321. case ImageFormatRgba16f:
  1322. return "rgba16f";
  1323. case ImageFormatR32f:
  1324. return "r32f";
  1325. case ImageFormatRgba8:
  1326. return "rgba8";
  1327. case ImageFormatRgba8Snorm:
  1328. return "rgba8_snorm";
  1329. case ImageFormatRg32f:
  1330. return "rg32f";
  1331. case ImageFormatRg16f:
  1332. return "rg16f";
  1333. case ImageFormatRgba32i:
  1334. return "rgba32i";
  1335. case ImageFormatRgba16i:
  1336. return "rgba16i";
  1337. case ImageFormatR32i:
  1338. return "r32i";
  1339. case ImageFormatRgba8i:
  1340. return "rgba8i";
  1341. case ImageFormatRg32i:
  1342. return "rg32i";
  1343. case ImageFormatRg16i:
  1344. return "rg16i";
  1345. case ImageFormatRgba32ui:
  1346. return "rgba32ui";
  1347. case ImageFormatRgba16ui:
  1348. return "rgba16ui";
  1349. case ImageFormatR32ui:
  1350. return "r32ui";
  1351. case ImageFormatRgba8ui:
  1352. return "rgba8ui";
  1353. case ImageFormatRg32ui:
  1354. return "rg32ui";
  1355. case ImageFormatRg16ui:
  1356. return "rg16ui";
  1357. case ImageFormatR11fG11fB10f:
  1358. return "r11f_g11f_b10f";
  1359. case ImageFormatR16f:
  1360. return "r16f";
  1361. case ImageFormatRgb10A2:
  1362. return "rgb10_a2";
  1363. case ImageFormatR8:
  1364. return "r8";
  1365. case ImageFormatRg8:
  1366. return "rg8";
  1367. case ImageFormatR16:
  1368. return "r16";
  1369. case ImageFormatRg16:
  1370. return "rg16";
  1371. case ImageFormatRgba16:
  1372. return "rgba16";
  1373. case ImageFormatR16Snorm:
  1374. return "r16_snorm";
  1375. case ImageFormatRg16Snorm:
  1376. return "rg16_snorm";
  1377. case ImageFormatRgba16Snorm:
  1378. return "rgba16_snorm";
  1379. case ImageFormatR8Snorm:
  1380. return "r8_snorm";
  1381. case ImageFormatRg8Snorm:
  1382. return "rg8_snorm";
  1383. case ImageFormatR8ui:
  1384. return "r8ui";
  1385. case ImageFormatRg8ui:
  1386. return "rg8ui";
  1387. case ImageFormatR16ui:
  1388. return "r16ui";
  1389. case ImageFormatRgb10a2ui:
  1390. return "rgb10_a2ui";
  1391. case ImageFormatR8i:
  1392. return "r8i";
  1393. case ImageFormatRg8i:
  1394. return "rg8i";
  1395. case ImageFormatR16i:
  1396. return "r16i";
  1397. case ImageFormatR64i:
  1398. return "r64i";
  1399. case ImageFormatR64ui:
  1400. return "r64ui";
  1401. default:
  1402. case ImageFormatUnknown:
  1403. return nullptr;
  1404. }
  1405. }
  1406. uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
  1407. {
  1408. switch (type.basetype)
  1409. {
  1410. case SPIRType::Double:
  1411. case SPIRType::Int64:
  1412. case SPIRType::UInt64:
  1413. return 8;
  1414. case SPIRType::Float:
  1415. case SPIRType::Int:
  1416. case SPIRType::UInt:
  1417. return 4;
  1418. case SPIRType::Half:
  1419. case SPIRType::Short:
  1420. case SPIRType::UShort:
  1421. case SPIRType::BFloat16:
  1422. return 2;
  1423. case SPIRType::SByte:
  1424. case SPIRType::UByte:
  1425. case SPIRType::FloatE4M3:
  1426. case SPIRType::FloatE5M2:
  1427. return 1;
  1428. default:
  1429. SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
  1430. }
  1431. }
  1432. uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
  1433. BufferPackingStandard packing)
  1434. {
  1435. // If using PhysicalStorageBuffer storage class, this is a pointer,
  1436. // and is 64-bit.
  1437. if (is_physical_pointer(type))
  1438. {
  1439. if (!type.pointer)
  1440. SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
  1441. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  1442. {
  1443. if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
  1444. return 16;
  1445. else
  1446. return 8;
  1447. }
  1448. else
  1449. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
  1450. }
  1451. else if (is_array(type))
  1452. {
  1453. uint32_t minimum_alignment = 1;
  1454. if (packing_is_vec4_padded(packing))
  1455. minimum_alignment = 16;
  1456. auto *tmp = &get<SPIRType>(type.parent_type);
  1457. while (!tmp->array.empty())
  1458. tmp = &get<SPIRType>(tmp->parent_type);
  1459. // Get the alignment of the base type, then maybe round up.
  1460. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
  1461. }
  1462. if (type.basetype == SPIRType::Struct)
  1463. {
  1464. // Rule 9. Structs alignments are maximum alignment of its members.
  1465. uint32_t alignment = 1;
  1466. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1467. {
  1468. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1469. alignment =
  1470. max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
  1471. }
  1472. // In std140, struct alignment is rounded up to 16.
  1473. if (packing_is_vec4_padded(packing))
  1474. alignment = max<uint32_t>(alignment, 16u);
  1475. return alignment;
  1476. }
  1477. else
  1478. {
  1479. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1480. // Alignment requirement for scalar block layout is always the alignment for the most basic component.
  1481. if (packing_is_scalar(packing))
  1482. return base_alignment;
  1483. // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
  1484. // a vec4, this is handled outside since that part knows our current offset.
  1485. if (type.columns == 1 && packing_is_hlsl(packing))
  1486. return base_alignment;
  1487. // From 7.6.2.2 in GL 4.5 core spec.
  1488. // Rule 1
  1489. if (type.vecsize == 1 && type.columns == 1)
  1490. return base_alignment;
  1491. // Rule 2
  1492. if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
  1493. return type.vecsize * base_alignment;
  1494. // Rule 3
  1495. if (type.vecsize == 3 && type.columns == 1)
  1496. return 4 * base_alignment;
  1497. // Rule 4 implied. Alignment does not change in std430.
  1498. // Rule 5. Column-major matrices are stored as arrays of
  1499. // vectors.
  1500. if (flags.get(DecorationColMajor) && type.columns > 1)
  1501. {
  1502. if (packing_is_vec4_padded(packing))
  1503. return 4 * base_alignment;
  1504. else if (type.vecsize == 3)
  1505. return 4 * base_alignment;
  1506. else
  1507. return type.vecsize * base_alignment;
  1508. }
  1509. // Rule 6 implied.
  1510. // Rule 7.
  1511. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1512. {
  1513. if (packing_is_vec4_padded(packing))
  1514. return 4 * base_alignment;
  1515. else if (type.columns == 3)
  1516. return 4 * base_alignment;
  1517. else
  1518. return type.columns * base_alignment;
  1519. }
  1520. // Rule 8 implied.
  1521. }
  1522. SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
  1523. }
  1524. uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
  1525. BufferPackingStandard packing)
  1526. {
  1527. // Array stride is equal to aligned size of the underlying type.
  1528. uint32_t parent = type.parent_type;
  1529. assert(parent);
  1530. auto &tmp = get<SPIRType>(parent);
  1531. uint32_t size = type_to_packed_size(tmp, flags, packing);
  1532. uint32_t alignment = type_to_packed_alignment(type, flags, packing);
  1533. return (size + alignment - 1) & ~(alignment - 1);
  1534. }
  1535. uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
  1536. {
  1537. // If using PhysicalStorageBuffer storage class, this is a pointer,
  1538. // and is 64-bit.
  1539. if (is_physical_pointer(type))
  1540. {
  1541. if (!type.pointer)
  1542. SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
  1543. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  1544. return 8;
  1545. else
  1546. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
  1547. }
  1548. else if (is_array(type))
  1549. {
  1550. uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
  1551. if (packing_is_hlsl(packing))
  1552. {
  1553. // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
  1554. // so that it is possible to pack other vectors into the last element.
  1555. if (type.basetype != SPIRType::Struct)
  1556. {
  1557. if (flags.get(DecorationRowMajor) && type.columns > 1)
  1558. packed_size -= (4 - type.columns) * (type.width / 8);
  1559. else
  1560. packed_size -= (4 - type.vecsize) * (type.width / 8);
  1561. }
  1562. else
  1563. {
  1564. const auto *base_type = &type;
  1565. while (is_array(*base_type))
  1566. {
  1567. auto &new_type = get<SPIRType>(base_type->parent_type);
  1568. if (!is_array(new_type))
  1569. break;
  1570. base_type = &new_type;
  1571. }
  1572. packed_size -= type_to_packed_array_stride(*base_type, flags, packing);
  1573. packed_size += type_to_packed_size(get<SPIRType>(base_type->parent_type), flags, packing);
  1574. }
  1575. }
  1576. return packed_size;
  1577. }
  1578. uint32_t size = 0;
  1579. if (type.basetype == SPIRType::Struct)
  1580. {
  1581. uint32_t pad_alignment = 1;
  1582. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1583. {
  1584. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1585. auto &member_type = get<SPIRType>(type.member_types[i]);
  1586. uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
  1587. uint32_t alignment = max(packed_alignment, pad_alignment);
  1588. uint32_t element_size = type_to_packed_size(member_type, member_flags, packing);
  1589. pad_alignment = 1;
  1590. if (packing_is_hlsl(packing))
  1591. {
  1592. // HLSL is primarily a "cannot-straddle-vec4" language.
  1593. uint32_t begin_word = size / 16;
  1594. uint32_t end_word = (size + element_size - 1) / 16;
  1595. if (begin_word != end_word)
  1596. alignment = max<uint32_t>(alignment, 16u);
  1597. }
  1598. else
  1599. {
  1600. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1601. // GL 4.5 spec, 7.6.2.2.
  1602. if (member_type.basetype == SPIRType::Struct)
  1603. pad_alignment = packed_alignment;
  1604. }
  1605. size = (size + alignment - 1) & ~(alignment - 1);
  1606. size += element_size;
  1607. }
  1608. }
  1609. else
  1610. {
  1611. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1612. if (packing_is_scalar(packing))
  1613. {
  1614. size = type.vecsize * type.columns * base_alignment;
  1615. }
  1616. else
  1617. {
  1618. if (type.columns == 1)
  1619. size = type.vecsize * base_alignment;
  1620. if (flags.get(DecorationColMajor) && type.columns > 1)
  1621. {
  1622. if (packing_is_vec4_padded(packing) || type.vecsize == 3)
  1623. size = type.columns * 4 * base_alignment;
  1624. else
  1625. size = type.columns * type.vecsize * base_alignment;
  1626. }
  1627. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1628. {
  1629. if (packing_is_vec4_padded(packing) || type.columns == 3)
  1630. size = type.vecsize * 4 * base_alignment;
  1631. else
  1632. size = type.vecsize * type.columns * base_alignment;
  1633. }
  1634. // For matrices in HLSL, the last element has a size which depends on its vector size,
  1635. // so that it is possible to pack other vectors into the last element.
  1636. if (packing_is_hlsl(packing) && type.columns > 1)
  1637. {
  1638. if (flags.get(DecorationRowMajor))
  1639. size -= (4 - type.columns) * (type.width / 8);
  1640. else
  1641. size -= (4 - type.vecsize) * (type.width / 8);
  1642. }
  1643. }
  1644. }
  1645. return size;
  1646. }
  1647. bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
  1648. uint32_t *failed_validation_index, uint32_t start_offset,
  1649. uint32_t end_offset)
  1650. {
  1651. // This is very tricky and error prone, but try to be exhaustive and correct here.
  1652. // SPIR-V doesn't directly say if we're using std430 or std140.
  1653. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
  1654. // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
  1655. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
  1656. //
  1657. // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
  1658. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
  1659. //
  1660. // The only two differences between std140 and std430 are related to padding alignment/array stride
  1661. // in arrays and structs. In std140 they take minimum vec4 alignment.
  1662. // std430 only removes the vec4 requirement.
  1663. uint32_t offset = 0;
  1664. uint32_t pad_alignment = 1;
  1665. bool is_top_level_block =
  1666. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1667. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1668. {
  1669. auto &memb_type = get<SPIRType>(type.member_types[i]);
  1670. auto *type_meta = ir.find_meta(type.self);
  1671. auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{};
  1672. // Verify alignment rules.
  1673. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
  1674. // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
  1675. // layout(constant_id = 0) const int s = 10;
  1676. // const int S = s + 5; // SpecConstantOp
  1677. // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
  1678. // we would need full implementation of compile-time constant folding. :(
  1679. // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
  1680. // for our analysis (e.g. unsized arrays).
  1681. // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
  1682. // Querying size of this member will fail, so just don't call it unless we have to.
  1683. //
  1684. // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
  1685. bool member_can_be_unsized =
  1686. is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
  1687. uint32_t packed_size = 0;
  1688. if (!member_can_be_unsized || packing_is_hlsl(packing))
  1689. packed_size = type_to_packed_size(memb_type, member_flags, packing);
  1690. // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
  1691. uint32_t actual_offset = type_struct_member_offset(type, i);
  1692. if (packing_is_hlsl(packing))
  1693. {
  1694. // If a member straddles across a vec4 boundary, alignment is actually vec4.
  1695. uint32_t target_offset;
  1696. // If we intend to use explicit packing, we must check for improper straddle with that offset.
  1697. // In implicit packing, we must check with implicit offset, since the explicit offset
  1698. // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4.
  1699. // This is important when packing sub-structs that don't support packoffset().
  1700. if (packing_has_flexible_offset(packing))
  1701. target_offset = actual_offset;
  1702. else
  1703. target_offset = offset;
  1704. uint32_t begin_word = target_offset / 16;
  1705. uint32_t end_word = (target_offset + packed_size - 1) / 16;
  1706. if (begin_word != end_word)
  1707. packed_alignment = max<uint32_t>(packed_alignment, 16u);
  1708. }
  1709. // Field is not in the specified range anymore and we can ignore any further fields.
  1710. if (actual_offset >= end_offset)
  1711. break;
  1712. uint32_t alignment = max(packed_alignment, pad_alignment);
  1713. offset = (offset + alignment - 1) & ~(alignment - 1);
  1714. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1715. // GL 4.5 spec, 7.6.2.2.
  1716. if (!packing_is_hlsl(packing) && memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
  1717. pad_alignment = packed_alignment;
  1718. else
  1719. pad_alignment = 1;
  1720. // Only care about packing if we are in the given range
  1721. if (actual_offset >= start_offset)
  1722. {
  1723. // We only care about offsets in std140, std430, etc ...
  1724. // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
  1725. if (!packing_has_flexible_offset(packing))
  1726. {
  1727. if (actual_offset != offset) // This cannot be the packing we're looking for.
  1728. {
  1729. if (failed_validation_index)
  1730. *failed_validation_index = i;
  1731. return false;
  1732. }
  1733. }
  1734. else if ((actual_offset & (alignment - 1)) != 0)
  1735. {
  1736. // We still need to verify that alignment rules are observed, even if we have explicit offset.
  1737. if (failed_validation_index)
  1738. *failed_validation_index = i;
  1739. return false;
  1740. }
  1741. // Verify array stride rules.
  1742. if (is_array(memb_type))
  1743. {
  1744. auto packed_array_stride = type_to_packed_array_stride(memb_type, member_flags, packing);
  1745. auto member_array_stride = type_struct_member_array_stride(type, i);
  1746. if (packed_array_stride != member_array_stride)
  1747. {
  1748. if (failed_validation_index)
  1749. *failed_validation_index = i;
  1750. return false;
  1751. }
  1752. }
  1753. // Verify that sub-structs also follow packing rules.
  1754. // We cannot use enhanced layouts on substructs, so they better be up to spec.
  1755. auto substruct_packing = packing_to_substruct_packing(packing);
  1756. if (!memb_type.pointer && !memb_type.member_types.empty() &&
  1757. !buffer_is_packing_standard(memb_type, substruct_packing))
  1758. {
  1759. if (failed_validation_index)
  1760. *failed_validation_index = i;
  1761. return false;
  1762. }
  1763. }
  1764. // Bump size.
  1765. offset = actual_offset + packed_size;
  1766. }
  1767. return true;
  1768. }
  1769. bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
  1770. {
  1771. // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
  1772. // Be very explicit here about how to solve the issue.
  1773. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
  1774. (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
  1775. {
  1776. uint32_t minimum_desktop_version = block ? 440 : 410;
  1777. // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
  1778. if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
  1779. return false;
  1780. else if (options.es && options.version < 310)
  1781. return false;
  1782. }
  1783. if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
  1784. (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
  1785. {
  1786. if (options.es && options.version < 300)
  1787. return false;
  1788. else if (!options.es && options.version < 330)
  1789. return false;
  1790. }
  1791. if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
  1792. {
  1793. if (options.es && options.version < 310)
  1794. return false;
  1795. else if (!options.es && options.version < 430)
  1796. return false;
  1797. }
  1798. return true;
  1799. }
  1800. string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
  1801. {
  1802. // FIXME: Come up with a better solution for when to disable layouts.
  1803. // Having layouts depend on extensions as well as which types
  1804. // of layouts are used. For now, the simple solution is to just disable
  1805. // layouts for legacy versions.
  1806. if (is_legacy())
  1807. return "";
  1808. if (subpass_input_is_framebuffer_fetch(var.self))
  1809. return "";
  1810. SmallVector<string> attr;
  1811. auto &type = get<SPIRType>(var.basetype);
  1812. auto &flags = get_decoration_bitset(var.self);
  1813. auto &typeflags = get_decoration_bitset(type.self);
  1814. if (flags.get(DecorationPassthroughNV))
  1815. attr.push_back("passthrough");
  1816. if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
  1817. attr.push_back("push_constant");
  1818. else if (var.storage == StorageClassShaderRecordBufferKHR)
  1819. attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
  1820. if (flags.get(DecorationRowMajor))
  1821. attr.push_back("row_major");
  1822. if (flags.get(DecorationColMajor))
  1823. attr.push_back("column_major");
  1824. if (options.vulkan_semantics)
  1825. {
  1826. if (flags.get(DecorationInputAttachmentIndex))
  1827. attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
  1828. }
  1829. bool is_block = has_decoration(type.self, DecorationBlock);
  1830. if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
  1831. {
  1832. Bitset combined_decoration;
  1833. for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
  1834. combined_decoration.merge_or(combined_decoration_for_member(type, i));
  1835. // If our members have location decorations, we don't need to
  1836. // emit location decorations at the top as well (looks weird).
  1837. if (!combined_decoration.get(DecorationLocation))
  1838. attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
  1839. }
  1840. if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
  1841. location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
  1842. {
  1843. attr.push_back("noncoherent");
  1844. }
  1845. // Transform feedback
  1846. bool uses_enhanced_layouts = false;
  1847. if (is_block && var.storage == StorageClassOutput)
  1848. {
  1849. // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
  1850. // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
  1851. // is the xfb_offset.
  1852. uint32_t member_count = uint32_t(type.member_types.size());
  1853. bool have_xfb_buffer_stride = false;
  1854. bool have_any_xfb_offset = false;
  1855. bool have_geom_stream = false;
  1856. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  1857. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
  1858. {
  1859. have_xfb_buffer_stride = true;
  1860. xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
  1861. xfb_stride = get_decoration(var.self, DecorationXfbStride);
  1862. }
  1863. if (flags.get(DecorationStream))
  1864. {
  1865. have_geom_stream = true;
  1866. geom_stream = get_decoration(var.self, DecorationStream);
  1867. }
  1868. // Verify that none of the members violate our assumption.
  1869. for (uint32_t i = 0; i < member_count; i++)
  1870. {
  1871. if (has_member_decoration(type.self, i, DecorationStream))
  1872. {
  1873. uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
  1874. if (have_geom_stream && member_geom_stream != geom_stream)
  1875. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  1876. have_geom_stream = true;
  1877. geom_stream = member_geom_stream;
  1878. }
  1879. // Only members with an Offset decoration participate in XFB.
  1880. if (!has_member_decoration(type.self, i, DecorationOffset))
  1881. continue;
  1882. have_any_xfb_offset = true;
  1883. if (has_member_decoration(type.self, i, DecorationXfbBuffer))
  1884. {
  1885. uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
  1886. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  1887. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  1888. have_xfb_buffer_stride = true;
  1889. xfb_buffer = buffer_index;
  1890. }
  1891. if (has_member_decoration(type.self, i, DecorationXfbStride))
  1892. {
  1893. uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
  1894. if (have_xfb_buffer_stride && stride != xfb_stride)
  1895. SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
  1896. have_xfb_buffer_stride = true;
  1897. xfb_stride = stride;
  1898. }
  1899. }
  1900. if (have_xfb_buffer_stride && have_any_xfb_offset)
  1901. {
  1902. attr.push_back(join("xfb_buffer = ", xfb_buffer));
  1903. attr.push_back(join("xfb_stride = ", xfb_stride));
  1904. uses_enhanced_layouts = true;
  1905. }
  1906. if (have_geom_stream)
  1907. {
  1908. if (get_execution_model() != ExecutionModelGeometry)
  1909. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1910. if (options.es)
  1911. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1912. if (options.version < 400)
  1913. require_extension_internal("GL_ARB_transform_feedback3");
  1914. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1915. }
  1916. }
  1917. else if (var.storage == StorageClassOutput)
  1918. {
  1919. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
  1920. {
  1921. // XFB for standalone variables, we can emit all decorations.
  1922. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
  1923. attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
  1924. attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
  1925. uses_enhanced_layouts = true;
  1926. }
  1927. if (flags.get(DecorationStream))
  1928. {
  1929. if (get_execution_model() != ExecutionModelGeometry)
  1930. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1931. if (options.es)
  1932. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1933. if (options.version < 400)
  1934. require_extension_internal("GL_ARB_transform_feedback3");
  1935. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1936. }
  1937. }
  1938. // Can only declare Component if we can declare location.
  1939. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
  1940. {
  1941. uses_enhanced_layouts = true;
  1942. attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
  1943. }
  1944. if (uses_enhanced_layouts)
  1945. {
  1946. if (!options.es)
  1947. {
  1948. if (options.version < 440 && options.version >= 140)
  1949. require_extension_internal("GL_ARB_enhanced_layouts");
  1950. else if (options.version < 140)
  1951. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
  1952. if (!options.es && options.version < 440)
  1953. require_extension_internal("GL_ARB_enhanced_layouts");
  1954. }
  1955. else if (options.es)
  1956. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
  1957. }
  1958. if (flags.get(DecorationIndex))
  1959. attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
  1960. // Do not emit set = decoration in regular GLSL output, but
  1961. // we need to preserve it in Vulkan GLSL mode.
  1962. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
  1963. {
  1964. if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
  1965. attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
  1966. }
  1967. bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
  1968. bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1969. (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
  1970. bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
  1971. bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
  1972. // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
  1973. bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
  1974. // pretend no UBOs when options say so
  1975. if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
  1976. can_use_buffer_blocks = false;
  1977. bool can_use_binding;
  1978. if (options.es)
  1979. can_use_binding = options.version >= 310;
  1980. else
  1981. can_use_binding = options.enable_420pack_extension || (options.version >= 420);
  1982. // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
  1983. if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
  1984. can_use_binding = false;
  1985. if (var.storage == StorageClassShaderRecordBufferKHR)
  1986. can_use_binding = false;
  1987. if (can_use_binding && flags.get(DecorationBinding))
  1988. attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
  1989. if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
  1990. attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
  1991. // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
  1992. // If SPIR-V does not comply with either layout, we cannot really work around it.
  1993. if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
  1994. {
  1995. attr.push_back(buffer_to_packing_standard(type, false, true));
  1996. }
  1997. else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
  1998. {
  1999. attr.push_back(buffer_to_packing_standard(type, true, true));
  2000. }
  2001. // For images, the type itself adds a layout qualifer.
  2002. // Only emit the format for storage images.
  2003. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  2004. {
  2005. const char *fmt = format_to_glsl(type.image.format);
  2006. if (fmt)
  2007. attr.push_back(fmt);
  2008. }
  2009. if (attr.empty())
  2010. return "";
  2011. string res = "layout(";
  2012. res += merge(attr);
  2013. res += ") ";
  2014. return res;
  2015. }
  2016. string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type,
  2017. bool support_std430_without_scalar_layout,
  2018. bool support_enhanced_layouts)
  2019. {
  2020. if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
  2021. return "std430";
  2022. else if (buffer_is_packing_standard(type, BufferPackingStd140))
  2023. return "std140";
  2024. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
  2025. {
  2026. require_extension_internal("GL_EXT_scalar_block_layout");
  2027. return "scalar";
  2028. }
  2029. else if (support_std430_without_scalar_layout &&
  2030. support_enhanced_layouts &&
  2031. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  2032. {
  2033. if (options.es && !options.vulkan_semantics)
  2034. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  2035. "not support GL_ARB_enhanced_layouts.");
  2036. if (!options.es && !options.vulkan_semantics && options.version < 440)
  2037. require_extension_internal("GL_ARB_enhanced_layouts");
  2038. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2039. return "std430";
  2040. }
  2041. else if (support_enhanced_layouts &&
  2042. buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
  2043. {
  2044. // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
  2045. // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
  2046. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
  2047. if (options.es && !options.vulkan_semantics)
  2048. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  2049. "not support GL_ARB_enhanced_layouts.");
  2050. if (!options.es && !options.vulkan_semantics && options.version < 440)
  2051. require_extension_internal("GL_ARB_enhanced_layouts");
  2052. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2053. return "std140";
  2054. }
  2055. else if (options.vulkan_semantics &&
  2056. support_enhanced_layouts &&
  2057. buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
  2058. {
  2059. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2060. require_extension_internal("GL_EXT_scalar_block_layout");
  2061. return "scalar";
  2062. }
  2063. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  2064. buffer_is_packing_standard(type, BufferPackingStd430))
  2065. {
  2066. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  2067. require_extension_internal("GL_EXT_scalar_block_layout");
  2068. return "std430";
  2069. }
  2070. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  2071. support_enhanced_layouts &&
  2072. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  2073. {
  2074. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  2075. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2076. require_extension_internal("GL_EXT_scalar_block_layout");
  2077. return "std430";
  2078. }
  2079. else
  2080. {
  2081. SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
  2082. "layouts. You can try flattening this block to support a more flexible layout.");
  2083. }
  2084. }
  2085. void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
  2086. {
  2087. if (flattened_buffer_blocks.count(var.self))
  2088. emit_buffer_block_flattened(var);
  2089. else if (options.vulkan_semantics)
  2090. emit_push_constant_block_vulkan(var);
  2091. else if (options.emit_push_constant_as_uniform_buffer)
  2092. emit_buffer_block_native(var);
  2093. else
  2094. emit_push_constant_block_glsl(var);
  2095. }
  2096. void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
  2097. {
  2098. emit_buffer_block(var);
  2099. }
  2100. void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
  2101. {
  2102. // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
  2103. auto &type = get<SPIRType>(var.basetype);
  2104. unset_decoration(var.self, DecorationBinding);
  2105. unset_decoration(var.self, DecorationDescriptorSet);
  2106. #if 0
  2107. if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
  2108. SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
  2109. "Remap to location with reflection API first or disable these decorations.");
  2110. #endif
  2111. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  2112. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  2113. bool block_flag = has_decoration(type.self, DecorationBlock);
  2114. unset_decoration(type.self, DecorationBlock);
  2115. emit_struct(type);
  2116. if (block_flag)
  2117. set_decoration(type.self, DecorationBlock);
  2118. emit_uniform(var);
  2119. statement("");
  2120. }
  2121. void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
  2122. {
  2123. auto &type = get<SPIRType>(var.basetype);
  2124. bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
  2125. if (flattened_buffer_blocks.count(var.self))
  2126. emit_buffer_block_flattened(var);
  2127. else if (is_legacy() || (!options.es && options.version == 130) ||
  2128. (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
  2129. emit_buffer_block_legacy(var);
  2130. else
  2131. emit_buffer_block_native(var);
  2132. }
  2133. void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
  2134. {
  2135. auto &type = get<SPIRType>(var.basetype);
  2136. bool ssbo = var.storage == StorageClassStorageBuffer ||
  2137. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2138. if (ssbo)
  2139. SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
  2140. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  2141. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  2142. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  2143. bool block_flag = block_flags.get(DecorationBlock);
  2144. block_flags.clear(DecorationBlock);
  2145. emit_struct(type);
  2146. if (block_flag)
  2147. block_flags.set(DecorationBlock);
  2148. emit_uniform(var);
  2149. statement("");
  2150. }
  2151. void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
  2152. {
  2153. auto &type = get<SPIRType>(type_id);
  2154. string buffer_name;
  2155. if (forward_declaration && is_physical_pointer_to_buffer_block(type))
  2156. {
  2157. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2158. // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
  2159. // The names must match up.
  2160. buffer_name = to_name(type.self, false);
  2161. // Shaders never use the block by interface name, so we don't
  2162. // have to track this other than updating name caches.
  2163. // If we have a collision for any reason, just fallback immediately.
  2164. if (ir.meta[type.self].decoration.alias.empty() ||
  2165. block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
  2166. resource_names.find(buffer_name) != end(resource_names))
  2167. {
  2168. buffer_name = join("_", type.self);
  2169. }
  2170. // Make sure we get something unique for both global name scope and block name scope.
  2171. // See GLSL 4.5 spec: section 4.3.9 for details.
  2172. add_variable(block_ssbo_names, resource_names, buffer_name);
  2173. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2174. // This cannot conflict with anything else, so we're safe now.
  2175. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2176. if (buffer_name.empty())
  2177. buffer_name = join("_", type.self);
  2178. block_names.insert(buffer_name);
  2179. block_ssbo_names.insert(buffer_name);
  2180. // Ensure we emit the correct name when emitting non-forward pointer type.
  2181. ir.meta[type.self].decoration.alias = buffer_name;
  2182. }
  2183. else
  2184. {
  2185. buffer_name = type_to_glsl(type);
  2186. }
  2187. if (!forward_declaration)
  2188. {
  2189. auto itr = physical_storage_type_to_alignment.find(type_id);
  2190. uint32_t alignment = 0;
  2191. if (itr != physical_storage_type_to_alignment.end())
  2192. alignment = itr->second.alignment;
  2193. if (is_physical_pointer_to_buffer_block(type))
  2194. {
  2195. SmallVector<std::string> attributes;
  2196. attributes.push_back("buffer_reference");
  2197. if (alignment)
  2198. attributes.push_back(join("buffer_reference_align = ", alignment));
  2199. attributes.push_back(buffer_to_packing_standard(type, true, true));
  2200. auto flags = ir.get_buffer_block_type_flags(type);
  2201. string decorations;
  2202. if (flags.get(DecorationRestrict))
  2203. decorations += " restrict";
  2204. if (flags.get(DecorationCoherent))
  2205. decorations += " coherent";
  2206. if (flags.get(DecorationNonReadable))
  2207. decorations += " writeonly";
  2208. if (flags.get(DecorationNonWritable))
  2209. decorations += " readonly";
  2210. statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
  2211. }
  2212. else
  2213. {
  2214. string packing_standard;
  2215. if (type.basetype == SPIRType::Struct)
  2216. {
  2217. // The non-block type is embedded in a block, so we cannot use enhanced layouts :(
  2218. packing_standard = buffer_to_packing_standard(type, true, false) + ", ";
  2219. }
  2220. else if (is_array(get_pointee_type(type)))
  2221. {
  2222. SPIRType wrap_type{OpTypeStruct};
  2223. wrap_type.self = ir.increase_bound_by(1);
  2224. wrap_type.member_types.push_back(get_pointee_type_id(type_id));
  2225. ir.set_member_decoration(wrap_type.self, 0, DecorationOffset, 0);
  2226. packing_standard = buffer_to_packing_standard(wrap_type, true, false) + ", ";
  2227. }
  2228. if (alignment)
  2229. statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
  2230. else
  2231. statement("layout(", packing_standard, "buffer_reference) buffer ", buffer_name);
  2232. }
  2233. begin_scope();
  2234. if (is_physical_pointer_to_buffer_block(type))
  2235. {
  2236. type.member_name_cache.clear();
  2237. uint32_t i = 0;
  2238. for (auto &member : type.member_types)
  2239. {
  2240. add_member_name(type, i);
  2241. emit_struct_member(type, member, i);
  2242. i++;
  2243. }
  2244. }
  2245. else
  2246. {
  2247. auto &pointee_type = get_pointee_type(type);
  2248. statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type, 0), ";");
  2249. }
  2250. end_scope_decl();
  2251. statement("");
  2252. }
  2253. else
  2254. {
  2255. statement("layout(buffer_reference) buffer ", buffer_name, ";");
  2256. }
  2257. }
  2258. void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
  2259. {
  2260. auto &type = get<SPIRType>(var.basetype);
  2261. Bitset flags = ir.get_buffer_block_flags(var);
  2262. bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  2263. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2264. bool is_restrict = ssbo && flags.get(DecorationRestrict);
  2265. bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
  2266. bool is_readonly = ssbo && flags.get(DecorationNonWritable);
  2267. bool is_coherent = ssbo && flags.get(DecorationCoherent);
  2268. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2269. auto buffer_name = to_name(type.self, false);
  2270. auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
  2271. // Shaders never use the block by interface name, so we don't
  2272. // have to track this other than updating name caches.
  2273. // If we have a collision for any reason, just fallback immediately.
  2274. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
  2275. resource_names.find(buffer_name) != end(resource_names))
  2276. {
  2277. buffer_name = get_block_fallback_name(var.self);
  2278. }
  2279. // Make sure we get something unique for both global name scope and block name scope.
  2280. // See GLSL 4.5 spec: section 4.3.9 for details.
  2281. add_variable(block_namespace, resource_names, buffer_name);
  2282. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2283. // This cannot conflict with anything else, so we're safe now.
  2284. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2285. if (buffer_name.empty())
  2286. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2287. block_names.insert(buffer_name);
  2288. block_namespace.insert(buffer_name);
  2289. // Save for post-reflection later.
  2290. declared_block_names[var.self] = buffer_name;
  2291. statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
  2292. is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
  2293. buffer_name);
  2294. begin_scope();
  2295. type.member_name_cache.clear();
  2296. uint32_t i = 0;
  2297. for (auto &member : type.member_types)
  2298. {
  2299. add_member_name(type, i);
  2300. emit_struct_member(type, member, i);
  2301. i++;
  2302. }
  2303. // Don't declare empty blocks in GLSL, this is not allowed.
  2304. if (type_is_empty(type) && !backend.supports_empty_struct)
  2305. statement("int empty_struct_member;");
  2306. // var.self can be used as a backup name for the block name,
  2307. // so we need to make sure we don't disturb the name here on a recompile.
  2308. // It will need to be reset if we have to recompile.
  2309. preserve_alias_on_reset(var.self);
  2310. add_resource_name(var.self);
  2311. end_scope_decl(to_name(var.self) + type_to_array_glsl(type, var.self));
  2312. statement("");
  2313. }
  2314. void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
  2315. {
  2316. auto &type = get<SPIRType>(var.basetype);
  2317. // Block names should never alias.
  2318. auto buffer_name = to_name(type.self, false);
  2319. size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
  2320. SPIRType::BaseType basic_type;
  2321. if (get_common_basic_type(type, basic_type))
  2322. {
  2323. SPIRType tmp { OpTypeVector };
  2324. tmp.basetype = basic_type;
  2325. tmp.vecsize = 4;
  2326. if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
  2327. SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
  2328. auto flags = ir.get_buffer_block_flags(var);
  2329. statement("uniform ", flags_to_qualifiers_glsl(tmp, 0, flags), type_to_glsl(tmp), " ", buffer_name, "[",
  2330. buffer_size, "];");
  2331. }
  2332. else
  2333. SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
  2334. }
  2335. const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  2336. {
  2337. auto &execution = get_entry_point();
  2338. if (subpass_input_is_framebuffer_fetch(var.self))
  2339. return "";
  2340. if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
  2341. {
  2342. if (is_legacy() && execution.model == ExecutionModelVertex)
  2343. return var.storage == StorageClassInput ? "attribute " : "varying ";
  2344. else if (is_legacy() && execution.model == ExecutionModelFragment)
  2345. return "varying "; // Fragment outputs are renamed so they never hit this case.
  2346. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  2347. {
  2348. uint32_t loc = get_decoration(var.self, DecorationLocation);
  2349. bool is_inout = location_is_framebuffer_fetch(loc);
  2350. if (is_inout)
  2351. return "inout ";
  2352. else
  2353. return "out ";
  2354. }
  2355. else
  2356. return var.storage == StorageClassInput ? "in " : "out ";
  2357. }
  2358. else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  2359. var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter)
  2360. {
  2361. return "uniform ";
  2362. }
  2363. else if (var.storage == StorageClassRayPayloadKHR)
  2364. {
  2365. return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
  2366. }
  2367. else if (var.storage == StorageClassIncomingRayPayloadKHR)
  2368. {
  2369. return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
  2370. }
  2371. else if (var.storage == StorageClassHitAttributeKHR)
  2372. {
  2373. return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
  2374. }
  2375. else if (var.storage == StorageClassCallableDataKHR)
  2376. {
  2377. return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
  2378. }
  2379. else if (var.storage == StorageClassIncomingCallableDataKHR)
  2380. {
  2381. return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
  2382. }
  2383. return "";
  2384. }
  2385. void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
  2386. const SmallVector<uint32_t> &indices)
  2387. {
  2388. uint32_t member_type_id = type.self;
  2389. const SPIRType *member_type = &type;
  2390. const SPIRType *parent_type = nullptr;
  2391. auto flattened_name = basename;
  2392. for (auto &index : indices)
  2393. {
  2394. flattened_name += "_";
  2395. flattened_name += to_member_name(*member_type, index);
  2396. parent_type = member_type;
  2397. member_type_id = member_type->member_types[index];
  2398. member_type = &get<SPIRType>(member_type_id);
  2399. }
  2400. assert(member_type->basetype != SPIRType::Struct);
  2401. // We're overriding struct member names, so ensure we do so on the primary type.
  2402. if (parent_type->type_alias)
  2403. parent_type = &get<SPIRType>(parent_type->type_alias);
  2404. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
  2405. // which is not allowed.
  2406. ParsedIR::sanitize_underscores(flattened_name);
  2407. uint32_t last_index = indices.back();
  2408. // Pass in the varying qualifier here so it will appear in the correct declaration order.
  2409. // Replace member name while emitting it so it encodes both struct name and member name.
  2410. auto backup_name = get_member_name(parent_type->self, last_index);
  2411. auto member_name = to_member_name(*parent_type, last_index);
  2412. set_member_name(parent_type->self, last_index, flattened_name);
  2413. emit_struct_member(*parent_type, member_type_id, last_index, qual);
  2414. // Restore member name.
  2415. set_member_name(parent_type->self, last_index, member_name);
  2416. }
  2417. void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
  2418. const SmallVector<uint32_t> &indices)
  2419. {
  2420. auto sub_indices = indices;
  2421. sub_indices.push_back(0);
  2422. const SPIRType *member_type = &type;
  2423. for (auto &index : indices)
  2424. member_type = &get<SPIRType>(member_type->member_types[index]);
  2425. assert(member_type->basetype == SPIRType::Struct);
  2426. if (!member_type->array.empty())
  2427. SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
  2428. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  2429. {
  2430. sub_indices.back() = i;
  2431. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  2432. emit_flattened_io_block_struct(basename, type, qual, sub_indices);
  2433. else
  2434. emit_flattened_io_block_member(basename, type, qual, sub_indices);
  2435. }
  2436. }
  2437. void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
  2438. {
  2439. auto &var_type = get<SPIRType>(var.basetype);
  2440. if (!var_type.array.empty())
  2441. SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
  2442. // Emit flattened types based on the type alias. Normally, we are never supposed to emit
  2443. // struct declarations for aliased types.
  2444. auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
  2445. auto old_flags = ir.meta[type.self].decoration.decoration_flags;
  2446. // Emit the members as if they are part of a block to get all qualifiers.
  2447. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
  2448. type.member_name_cache.clear();
  2449. SmallVector<uint32_t> member_indices;
  2450. member_indices.push_back(0);
  2451. auto basename = to_name(var.self);
  2452. uint32_t i = 0;
  2453. for (auto &member : type.member_types)
  2454. {
  2455. add_member_name(type, i);
  2456. auto &membertype = get<SPIRType>(member);
  2457. member_indices.back() = i;
  2458. if (membertype.basetype == SPIRType::Struct)
  2459. emit_flattened_io_block_struct(basename, type, qual, member_indices);
  2460. else
  2461. emit_flattened_io_block_member(basename, type, qual, member_indices);
  2462. i++;
  2463. }
  2464. ir.meta[type.self].decoration.decoration_flags = old_flags;
  2465. // Treat this variable as fully flattened from now on.
  2466. flattened_structs[var.self] = true;
  2467. }
  2468. void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
  2469. {
  2470. auto &type = get<SPIRType>(var.basetype);
  2471. if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
  2472. !options.es && options.version < 410)
  2473. {
  2474. require_extension_internal("GL_ARB_vertex_attrib_64bit");
  2475. }
  2476. // Either make it plain in/out or in/out blocks depending on what shader is doing ...
  2477. bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
  2478. const char *qual = to_storage_qualifiers_glsl(var);
  2479. if (block)
  2480. {
  2481. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2482. // I/O variables which are struct types.
  2483. // To support this, flatten the struct into separate varyings instead.
  2484. if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2485. (!options.es && options.version < 150))
  2486. {
  2487. // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
  2488. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
  2489. emit_flattened_io_block(var, qual);
  2490. }
  2491. else
  2492. {
  2493. if (options.es && options.version < 320)
  2494. {
  2495. // Geometry and tessellation extensions imply this extension.
  2496. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
  2497. require_extension_internal("GL_EXT_shader_io_blocks");
  2498. }
  2499. // Workaround to make sure we can emit "patch in/out" correctly.
  2500. fixup_io_block_patch_primitive_qualifiers(var);
  2501. // Block names should never alias.
  2502. auto block_name = to_name(type.self, false);
  2503. // The namespace for I/O blocks is separate from other variables in GLSL.
  2504. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
  2505. // Shaders never use the block by interface name, so we don't
  2506. // have to track this other than updating name caches.
  2507. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
  2508. block_name = get_fallback_name(type.self);
  2509. else
  2510. block_namespace.insert(block_name);
  2511. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2512. // This cannot conflict with anything else, so we're safe now.
  2513. if (block_name.empty())
  2514. block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2515. // Instance names cannot alias block names.
  2516. resource_names.insert(block_name);
  2517. const char *block_qualifier;
  2518. if (has_decoration(var.self, DecorationPatch))
  2519. block_qualifier = "patch ";
  2520. else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  2521. block_qualifier = "perprimitiveEXT ";
  2522. else if (has_decoration(var.self, DecorationPerVertexKHR))
  2523. block_qualifier = "pervertexEXT ";
  2524. else
  2525. block_qualifier = "";
  2526. statement(layout_for_variable(var), block_qualifier, qual, block_name);
  2527. begin_scope();
  2528. type.member_name_cache.clear();
  2529. uint32_t i = 0;
  2530. for (auto &member : type.member_types)
  2531. {
  2532. add_member_name(type, i);
  2533. emit_struct_member(type, member, i);
  2534. i++;
  2535. }
  2536. add_resource_name(var.self);
  2537. end_scope_decl(join(to_name(var.self), type_to_array_glsl(type, var.self)));
  2538. statement("");
  2539. }
  2540. }
  2541. else
  2542. {
  2543. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2544. // I/O variables which are struct types.
  2545. // To support this, flatten the struct into separate varyings instead.
  2546. if (type.basetype == SPIRType::Struct &&
  2547. (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2548. (!options.es && options.version < 150)))
  2549. {
  2550. emit_flattened_io_block(var, qual);
  2551. }
  2552. else
  2553. {
  2554. add_resource_name(var.self);
  2555. // Legacy GLSL did not support int attributes, we automatically
  2556. // declare them as float and cast them on load/store
  2557. SPIRType newtype = type;
  2558. if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int)
  2559. newtype.basetype = SPIRType::Float;
  2560. // Tessellation control and evaluation shaders must have either
  2561. // gl_MaxPatchVertices or unsized arrays for input arrays.
  2562. // Opt for unsized as it's the more "correct" variant to use.
  2563. if (type.storage == StorageClassInput && !type.array.empty() &&
  2564. !has_decoration(var.self, DecorationPatch) &&
  2565. (get_entry_point().model == ExecutionModelTessellationControl ||
  2566. get_entry_point().model == ExecutionModelTessellationEvaluation))
  2567. {
  2568. newtype.array.back() = 0;
  2569. newtype.array_size_literal.back() = true;
  2570. }
  2571. statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
  2572. variable_decl(newtype, to_name(var.self), var.self), ";");
  2573. }
  2574. }
  2575. }
  2576. void CompilerGLSL::emit_uniform(const SPIRVariable &var)
  2577. {
  2578. auto &type = get<SPIRType>(var.basetype);
  2579. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  2580. {
  2581. if (!options.es && options.version < 420)
  2582. require_extension_internal("GL_ARB_shader_image_load_store");
  2583. else if (options.es && options.version < 310)
  2584. SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
  2585. }
  2586. add_resource_name(var.self);
  2587. statement(layout_for_variable(var), variable_decl(var), ";");
  2588. }
  2589. string CompilerGLSL::constant_value_macro_name(uint32_t id) const
  2590. {
  2591. return join("SPIRV_CROSS_CONSTANT_ID_", id);
  2592. }
  2593. void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
  2594. {
  2595. auto &type = get<SPIRType>(constant.basetype);
  2596. // This will break. It is bogus and should not be legal.
  2597. if (type_is_top_level_block(type))
  2598. return;
  2599. add_resource_name(constant.self);
  2600. auto name = to_name(constant.self);
  2601. statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
  2602. }
  2603. int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
  2604. {
  2605. auto &entry_point = get_entry_point();
  2606. int index = -1;
  2607. // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
  2608. // since the spec constant declarations are never explicitly declared.
  2609. if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId))
  2610. {
  2611. if (c.self == entry_point.workgroup_size.id_x)
  2612. index = 0;
  2613. else if (c.self == entry_point.workgroup_size.id_y)
  2614. index = 1;
  2615. else if (c.self == entry_point.workgroup_size.id_z)
  2616. index = 2;
  2617. }
  2618. return index;
  2619. }
  2620. void CompilerGLSL::emit_constant(const SPIRConstant &constant)
  2621. {
  2622. auto &type = get<SPIRType>(constant.constant_type);
  2623. // This will break. It is bogus and should not be legal.
  2624. if (type_is_top_level_block(type))
  2625. return;
  2626. SpecializationConstant wg_x, wg_y, wg_z;
  2627. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2628. // This specialization constant is implicitly declared by emitting layout() in;
  2629. if (constant.self == workgroup_size_id)
  2630. return;
  2631. // These specialization constants are implicitly declared by emitting layout() in;
  2632. // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
  2633. // later can use macro overrides for work group size.
  2634. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
  2635. ConstantID(constant.self) == wg_z.id;
  2636. if (options.vulkan_semantics && is_workgroup_size_constant)
  2637. {
  2638. // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
  2639. return;
  2640. }
  2641. else if (!options.vulkan_semantics && is_workgroup_size_constant &&
  2642. !has_decoration(constant.self, DecorationSpecId))
  2643. {
  2644. // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
  2645. return;
  2646. }
  2647. add_resource_name(constant.self);
  2648. auto name = to_name(constant.self);
  2649. // Only scalars have constant IDs.
  2650. if (has_decoration(constant.self, DecorationSpecId))
  2651. {
  2652. if (options.vulkan_semantics)
  2653. {
  2654. statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
  2655. variable_decl(type, name), " = ", constant_expression(constant), ";");
  2656. }
  2657. else
  2658. {
  2659. const string &macro_name = constant.specialization_constant_macro_name;
  2660. statement("#ifndef ", macro_name);
  2661. statement("#define ", macro_name, " ", constant_expression(constant));
  2662. statement("#endif");
  2663. // For workgroup size constants, only emit the macros.
  2664. if (!is_workgroup_size_constant)
  2665. statement("const ", variable_decl(type, name), " = ", macro_name, ";");
  2666. }
  2667. }
  2668. else
  2669. {
  2670. statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
  2671. }
  2672. }
  2673. void CompilerGLSL::emit_entry_point_declarations()
  2674. {
  2675. }
  2676. void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
  2677. {
  2678. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  2679. if (is_hidden_variable(var))
  2680. return;
  2681. auto *meta = ir.find_meta(var.self);
  2682. if (!meta)
  2683. return;
  2684. auto &m = meta->decoration;
  2685. if (keywords.find(m.alias) != end(keywords))
  2686. m.alias = join("_", m.alias);
  2687. });
  2688. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
  2689. auto *meta = ir.find_meta(func.self);
  2690. if (!meta)
  2691. return;
  2692. auto &m = meta->decoration;
  2693. if (keywords.find(m.alias) != end(keywords))
  2694. m.alias = join("_", m.alias);
  2695. });
  2696. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  2697. auto *meta = ir.find_meta(type.self);
  2698. if (!meta)
  2699. return;
  2700. auto &m = meta->decoration;
  2701. if (keywords.find(m.alias) != end(keywords))
  2702. m.alias = join("_", m.alias);
  2703. for (auto &memb : meta->members)
  2704. if (keywords.find(memb.alias) != end(keywords))
  2705. memb.alias = join("_", memb.alias);
  2706. });
  2707. }
  2708. void CompilerGLSL::replace_illegal_names()
  2709. {
  2710. // clang-format off
  2711. static const unordered_set<string> keywords = {
  2712. "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
  2713. "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
  2714. "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
  2715. "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
  2716. "ceil", "cos", "cosh", "cross", "degrees",
  2717. "dFdx", "dFdxCoarse", "dFdxFine",
  2718. "dFdy", "dFdyCoarse", "dFdyFine",
  2719. "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
  2720. "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
  2721. "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
  2722. "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
  2723. "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
  2724. "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
  2725. "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
  2726. "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
  2727. "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
  2728. "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
  2729. "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
  2730. "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
  2731. "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
  2732. "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
  2733. "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
  2734. "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
  2735. "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
  2736. "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
  2737. "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
  2738. "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
  2739. "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
  2740. "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
  2741. "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
  2742. "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
  2743. "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
  2744. "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
  2745. "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
  2746. "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
  2747. "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
  2748. "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
  2749. "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
  2750. "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
  2751. "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
  2752. "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
  2753. "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
  2754. "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
  2755. "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
  2756. "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
  2757. "while", "writeonly",
  2758. };
  2759. // clang-format on
  2760. replace_illegal_names(keywords);
  2761. }
  2762. void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
  2763. {
  2764. auto &m = ir.meta[var.self].decoration;
  2765. uint32_t location = 0;
  2766. if (m.decoration_flags.get(DecorationLocation))
  2767. location = m.location;
  2768. // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
  2769. // do the access chain part of this for us.
  2770. auto &type = get<SPIRType>(var.basetype);
  2771. if (type.array.empty())
  2772. {
  2773. // Redirect the write to a specific render target in legacy GLSL.
  2774. m.alias = join("gl_FragData[", location, "]");
  2775. if (is_legacy_es() && location != 0)
  2776. require_extension_internal("GL_EXT_draw_buffers");
  2777. }
  2778. else if (type.array.size() == 1)
  2779. {
  2780. // If location is non-zero, we probably have to add an offset.
  2781. // This gets really tricky since we'd have to inject an offset in the access chain.
  2782. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
  2783. m.alias = "gl_FragData";
  2784. if (location != 0)
  2785. SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
  2786. "This is unimplemented in SPIRV-Cross.");
  2787. if (is_legacy_es())
  2788. require_extension_internal("GL_EXT_draw_buffers");
  2789. }
  2790. else
  2791. SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
  2792. var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
  2793. }
  2794. void CompilerGLSL::replace_fragment_outputs()
  2795. {
  2796. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2797. auto &type = this->get<SPIRType>(var.basetype);
  2798. if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
  2799. replace_fragment_output(var);
  2800. });
  2801. }
  2802. string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
  2803. {
  2804. if (out_type.vecsize == input_components)
  2805. return expr;
  2806. else if (input_components == 1 && !backend.can_swizzle_scalar)
  2807. return join(type_to_glsl(out_type), "(", expr, ")");
  2808. else
  2809. {
  2810. // FIXME: This will not work with packed expressions.
  2811. auto e = enclose_expression(expr) + ".";
  2812. // Just clamp the swizzle index if we have more outputs than inputs.
  2813. for (uint32_t c = 0; c < out_type.vecsize; c++)
  2814. e += index_to_swizzle(min(c, input_components - 1));
  2815. if (backend.swizzle_is_function && out_type.vecsize > 1)
  2816. e += "()";
  2817. remove_duplicate_swizzle(e);
  2818. return e;
  2819. }
  2820. }
  2821. void CompilerGLSL::emit_pls()
  2822. {
  2823. auto &execution = get_entry_point();
  2824. if (execution.model != ExecutionModelFragment)
  2825. SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
  2826. if (!options.es)
  2827. SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
  2828. if (options.version < 300)
  2829. SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
  2830. if (!pls_inputs.empty())
  2831. {
  2832. statement("__pixel_local_inEXT _PLSIn");
  2833. begin_scope();
  2834. for (auto &input : pls_inputs)
  2835. statement(pls_decl(input), ";");
  2836. end_scope_decl();
  2837. statement("");
  2838. }
  2839. if (!pls_outputs.empty())
  2840. {
  2841. statement("__pixel_local_outEXT _PLSOut");
  2842. begin_scope();
  2843. for (auto &output : pls_outputs)
  2844. statement(pls_decl(output), ";");
  2845. end_scope_decl();
  2846. statement("");
  2847. }
  2848. }
  2849. void CompilerGLSL::fixup_image_load_store_access()
  2850. {
  2851. if (!options.enable_storage_image_qualifier_deduction)
  2852. return;
  2853. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
  2854. auto &vartype = expression_type(var);
  2855. if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
  2856. {
  2857. // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
  2858. // Solve this by making the image access as restricted as possible and loosen up if we need to.
  2859. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
  2860. if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable))
  2861. {
  2862. set_decoration(var, DecorationNonWritable);
  2863. set_decoration(var, DecorationNonReadable);
  2864. }
  2865. }
  2866. });
  2867. }
  2868. static bool is_block_builtin(BuiltIn builtin)
  2869. {
  2870. return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  2871. builtin == BuiltInCullDistance;
  2872. }
  2873. bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
  2874. {
  2875. // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
  2876. if (storage != StorageClassOutput)
  2877. return false;
  2878. bool should_force = false;
  2879. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2880. if (should_force)
  2881. return;
  2882. auto &type = this->get<SPIRType>(var.basetype);
  2883. bool block = has_decoration(type.self, DecorationBlock);
  2884. if (var.storage == storage && block && is_builtin_variable(var))
  2885. {
  2886. uint32_t member_count = uint32_t(type.member_types.size());
  2887. for (uint32_t i = 0; i < member_count; i++)
  2888. {
  2889. if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
  2890. is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
  2891. has_member_decoration(type.self, i, DecorationOffset))
  2892. {
  2893. should_force = true;
  2894. }
  2895. }
  2896. }
  2897. else if (var.storage == storage && !block && is_builtin_variable(var))
  2898. {
  2899. if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
  2900. has_decoration(var.self, DecorationOffset))
  2901. {
  2902. should_force = true;
  2903. }
  2904. }
  2905. });
  2906. // If we're declaring clip/cull planes with control points we need to force block declaration.
  2907. if ((get_execution_model() == ExecutionModelTessellationControl ||
  2908. get_execution_model() == ExecutionModelMeshEXT) &&
  2909. (clip_distance_count || cull_distance_count))
  2910. {
  2911. should_force = true;
  2912. }
  2913. // Either glslang bug or oversight, but global invariant position does not work in mesh shaders.
  2914. if (get_execution_model() == ExecutionModelMeshEXT && position_invariant)
  2915. should_force = true;
  2916. return should_force;
  2917. }
  2918. void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
  2919. {
  2920. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2921. auto &type = this->get<SPIRType>(var.basetype);
  2922. bool block = has_decoration(type.self, DecorationBlock);
  2923. if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
  2924. is_builtin_variable(var))
  2925. {
  2926. if (model != ExecutionModelMeshEXT)
  2927. {
  2928. // Make sure the array has a supported name in the code.
  2929. if (var.storage == StorageClassOutput)
  2930. set_name(var.self, "gl_out");
  2931. else if (var.storage == StorageClassInput)
  2932. set_name(var.self, "gl_in");
  2933. }
  2934. else
  2935. {
  2936. auto flags = get_buffer_block_flags(var.self);
  2937. if (flags.get(DecorationPerPrimitiveEXT))
  2938. {
  2939. set_name(var.self, "gl_MeshPrimitivesEXT");
  2940. set_name(type.self, "gl_MeshPerPrimitiveEXT");
  2941. }
  2942. else
  2943. {
  2944. set_name(var.self, "gl_MeshVerticesEXT");
  2945. set_name(type.self, "gl_MeshPerVertexEXT");
  2946. }
  2947. }
  2948. }
  2949. if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
  2950. {
  2951. auto *m = ir.find_meta(var.self);
  2952. if (m && m->decoration.builtin)
  2953. {
  2954. auto builtin_type = m->decoration.builtin_type;
  2955. if (builtin_type == BuiltInPrimitivePointIndicesEXT)
  2956. set_name(var.self, "gl_PrimitivePointIndicesEXT");
  2957. else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
  2958. set_name(var.self, "gl_PrimitiveLineIndicesEXT");
  2959. else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
  2960. set_name(var.self, "gl_PrimitiveTriangleIndicesEXT");
  2961. }
  2962. }
  2963. });
  2964. }
  2965. void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
  2966. {
  2967. Bitset emitted_builtins;
  2968. Bitset global_builtins;
  2969. const SPIRVariable *block_var = nullptr;
  2970. bool emitted_block = false;
  2971. // Need to use declared size in the type.
  2972. // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
  2973. uint32_t cull_distance_size = 0;
  2974. uint32_t clip_distance_size = 0;
  2975. bool have_xfb_buffer_stride = false;
  2976. bool have_geom_stream = false;
  2977. bool have_any_xfb_offset = false;
  2978. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  2979. std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
  2980. const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
  2981. return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
  2982. builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
  2983. };
  2984. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2985. auto &type = this->get<SPIRType>(var.basetype);
  2986. bool block = has_decoration(type.self, DecorationBlock);
  2987. Bitset builtins;
  2988. if (var.storage == storage && block && is_builtin_variable(var))
  2989. {
  2990. uint32_t index = 0;
  2991. for (auto &m : ir.meta[type.self].members)
  2992. {
  2993. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  2994. {
  2995. builtins.set(m.builtin_type);
  2996. if (m.builtin_type == BuiltInCullDistance)
  2997. cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2998. else if (m.builtin_type == BuiltInClipDistance)
  2999. clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  3000. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
  3001. {
  3002. have_any_xfb_offset = true;
  3003. builtin_xfb_offsets[m.builtin_type] = m.offset;
  3004. }
  3005. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  3006. {
  3007. uint32_t stream = m.stream;
  3008. if (have_geom_stream && geom_stream != stream)
  3009. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  3010. have_geom_stream = true;
  3011. geom_stream = stream;
  3012. }
  3013. }
  3014. index++;
  3015. }
  3016. if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
  3017. has_decoration(var.self, DecorationXfbStride))
  3018. {
  3019. uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
  3020. uint32_t stride = get_decoration(var.self, DecorationXfbStride);
  3021. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  3022. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3023. if (have_xfb_buffer_stride && stride != xfb_stride)
  3024. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3025. have_xfb_buffer_stride = true;
  3026. xfb_buffer = buffer_index;
  3027. xfb_stride = stride;
  3028. }
  3029. if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
  3030. {
  3031. uint32_t stream = get_decoration(var.self, DecorationStream);
  3032. if (have_geom_stream && geom_stream != stream)
  3033. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  3034. have_geom_stream = true;
  3035. geom_stream = stream;
  3036. }
  3037. }
  3038. else if (var.storage == storage && !block && is_builtin_variable(var))
  3039. {
  3040. // While we're at it, collect all declared global builtins (HLSL mostly ...).
  3041. auto &m = ir.meta[var.self].decoration;
  3042. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  3043. {
  3044. // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type
  3045. // for correct result.
  3046. global_builtins.set(m.builtin_type);
  3047. if (m.builtin_type == BuiltInCullDistance)
  3048. cull_distance_size = to_array_size_literal(type, 0);
  3049. else if (m.builtin_type == BuiltInClipDistance)
  3050. clip_distance_size = to_array_size_literal(type, 0);
  3051. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
  3052. m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
  3053. {
  3054. have_any_xfb_offset = true;
  3055. builtin_xfb_offsets[m.builtin_type] = m.offset;
  3056. uint32_t buffer_index = m.xfb_buffer;
  3057. uint32_t stride = m.xfb_stride;
  3058. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  3059. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3060. if (have_xfb_buffer_stride && stride != xfb_stride)
  3061. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3062. have_xfb_buffer_stride = true;
  3063. xfb_buffer = buffer_index;
  3064. xfb_stride = stride;
  3065. }
  3066. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  3067. {
  3068. uint32_t stream = get_decoration(var.self, DecorationStream);
  3069. if (have_geom_stream && geom_stream != stream)
  3070. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  3071. have_geom_stream = true;
  3072. geom_stream = stream;
  3073. }
  3074. }
  3075. }
  3076. if (builtins.empty())
  3077. return;
  3078. if (emitted_block)
  3079. SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
  3080. emitted_builtins = builtins;
  3081. emitted_block = true;
  3082. block_var = &var;
  3083. });
  3084. global_builtins =
  3085. Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
  3086. (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
  3087. // Try to collect all other declared builtins.
  3088. if (!emitted_block)
  3089. emitted_builtins = global_builtins;
  3090. // Can't declare an empty interface block.
  3091. if (emitted_builtins.empty())
  3092. return;
  3093. if (storage == StorageClassOutput)
  3094. {
  3095. SmallVector<string> attr;
  3096. if (have_xfb_buffer_stride && have_any_xfb_offset)
  3097. {
  3098. if (!options.es)
  3099. {
  3100. if (options.version < 440 && options.version >= 140)
  3101. require_extension_internal("GL_ARB_enhanced_layouts");
  3102. else if (options.version < 140)
  3103. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  3104. if (!options.es && options.version < 440)
  3105. require_extension_internal("GL_ARB_enhanced_layouts");
  3106. }
  3107. else if (options.es)
  3108. SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
  3109. attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
  3110. }
  3111. if (have_geom_stream)
  3112. {
  3113. if (get_execution_model() != ExecutionModelGeometry)
  3114. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  3115. if (options.es)
  3116. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  3117. if (options.version < 400)
  3118. require_extension_internal("GL_ARB_transform_feedback3");
  3119. attr.push_back(join("stream = ", geom_stream));
  3120. }
  3121. if (model == ExecutionModelMeshEXT)
  3122. statement("out gl_MeshPerVertexEXT");
  3123. else if (!attr.empty())
  3124. statement("layout(", merge(attr), ") out gl_PerVertex");
  3125. else
  3126. statement("out gl_PerVertex");
  3127. }
  3128. else
  3129. {
  3130. // If we have passthrough, there is no way PerVertex cannot be passthrough.
  3131. if (get_entry_point().geometry_passthrough)
  3132. statement("layout(passthrough) in gl_PerVertex");
  3133. else
  3134. statement("in gl_PerVertex");
  3135. }
  3136. begin_scope();
  3137. if (emitted_builtins.get(BuiltInPosition))
  3138. {
  3139. auto itr = builtin_xfb_offsets.find(BuiltInPosition);
  3140. if (itr != end(builtin_xfb_offsets))
  3141. statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
  3142. else if (position_invariant)
  3143. statement("invariant vec4 gl_Position;");
  3144. else
  3145. statement("vec4 gl_Position;");
  3146. }
  3147. if (emitted_builtins.get(BuiltInPointSize))
  3148. {
  3149. auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
  3150. if (itr != end(builtin_xfb_offsets))
  3151. statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
  3152. else
  3153. statement("float gl_PointSize;");
  3154. }
  3155. if (emitted_builtins.get(BuiltInClipDistance))
  3156. {
  3157. auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
  3158. if (itr != end(builtin_xfb_offsets))
  3159. statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
  3160. else
  3161. statement("float gl_ClipDistance[", clip_distance_size, "];");
  3162. }
  3163. if (emitted_builtins.get(BuiltInCullDistance))
  3164. {
  3165. auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
  3166. if (itr != end(builtin_xfb_offsets))
  3167. statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
  3168. else
  3169. statement("float gl_CullDistance[", cull_distance_size, "];");
  3170. }
  3171. bool builtin_array = model == ExecutionModelTessellationControl ||
  3172. (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
  3173. (model == ExecutionModelGeometry && storage == StorageClassInput) ||
  3174. (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
  3175. if (builtin_array)
  3176. {
  3177. const char *instance_name;
  3178. if (model == ExecutionModelMeshEXT)
  3179. instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
  3180. else
  3181. instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
  3182. if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
  3183. end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]"));
  3184. else
  3185. end_scope_decl(join(instance_name, "[]"));
  3186. }
  3187. else
  3188. end_scope_decl();
  3189. statement("");
  3190. }
  3191. bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
  3192. {
  3193. bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
  3194. if (statically_assigned)
  3195. {
  3196. auto *constant = maybe_get<SPIRConstant>(var.static_expression);
  3197. if (constant && constant->is_used_as_lut)
  3198. return true;
  3199. }
  3200. return false;
  3201. }
  3202. void CompilerGLSL::emit_resources()
  3203. {
  3204. auto &execution = get_entry_point();
  3205. replace_illegal_names();
  3206. // Legacy GL uses gl_FragData[], redeclare all fragment outputs
  3207. // with builtins.
  3208. if (execution.model == ExecutionModelFragment && is_legacy())
  3209. replace_fragment_outputs();
  3210. // Emit PLS blocks if we have such variables.
  3211. if (!pls_inputs.empty() || !pls_outputs.empty())
  3212. emit_pls();
  3213. switch (execution.model)
  3214. {
  3215. case ExecutionModelGeometry:
  3216. case ExecutionModelTessellationControl:
  3217. case ExecutionModelTessellationEvaluation:
  3218. case ExecutionModelMeshEXT:
  3219. fixup_implicit_builtin_block_names(execution.model);
  3220. break;
  3221. default:
  3222. break;
  3223. }
  3224. bool global_invariant_position = position_invariant && (options.es || options.version >= 120);
  3225. // Emit custom gl_PerVertex for SSO compatibility.
  3226. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
  3227. {
  3228. switch (execution.model)
  3229. {
  3230. case ExecutionModelGeometry:
  3231. case ExecutionModelTessellationControl:
  3232. case ExecutionModelTessellationEvaluation:
  3233. emit_declared_builtin_block(StorageClassInput, execution.model);
  3234. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3235. global_invariant_position = false;
  3236. break;
  3237. case ExecutionModelVertex:
  3238. case ExecutionModelMeshEXT:
  3239. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3240. global_invariant_position = false;
  3241. break;
  3242. default:
  3243. break;
  3244. }
  3245. }
  3246. else if (should_force_emit_builtin_block(StorageClassOutput))
  3247. {
  3248. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3249. global_invariant_position = false;
  3250. }
  3251. else if (execution.geometry_passthrough)
  3252. {
  3253. // Need to declare gl_in with Passthrough.
  3254. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
  3255. emit_declared_builtin_block(StorageClassInput, execution.model);
  3256. }
  3257. else
  3258. {
  3259. // Need to redeclare clip/cull distance with explicit size to use them.
  3260. // SPIR-V mandates these builtins have a size declared.
  3261. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
  3262. if (clip_distance_count != 0)
  3263. statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
  3264. if (cull_distance_count != 0)
  3265. statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
  3266. if (clip_distance_count != 0 || cull_distance_count != 0)
  3267. statement("");
  3268. }
  3269. if (global_invariant_position)
  3270. {
  3271. statement("invariant gl_Position;");
  3272. statement("");
  3273. }
  3274. bool emitted = false;
  3275. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  3276. {
  3277. // Output buffer reference block forward declarations.
  3278. ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type)
  3279. {
  3280. if (is_physical_pointer(type))
  3281. {
  3282. bool emit_type = true;
  3283. if (!is_physical_pointer_to_buffer_block(type))
  3284. {
  3285. // Only forward-declare if we intend to emit it in the non_block_pointer types.
  3286. // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
  3287. emit_type = std::find(physical_storage_non_block_pointer_types.begin(),
  3288. physical_storage_non_block_pointer_types.end(),
  3289. id) != physical_storage_non_block_pointer_types.end();
  3290. }
  3291. if (emit_type)
  3292. {
  3293. emit_buffer_reference_block(id, true);
  3294. emitted = true;
  3295. }
  3296. }
  3297. });
  3298. }
  3299. if (emitted)
  3300. statement("");
  3301. emitted = false;
  3302. // If emitted Vulkan GLSL,
  3303. // emit specialization constants as actual floats,
  3304. // spec op expressions will redirect to the constant name.
  3305. //
  3306. {
  3307. auto loop_lock = ir.create_loop_hard_lock();
  3308. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  3309. {
  3310. auto &id = ir.ids[id_];
  3311. // Skip declaring any bogus constants or undefs which use block types.
  3312. // We don't declare block types directly, so this will never work.
  3313. // Should not be legal SPIR-V, so this is considered a workaround.
  3314. if (id.get_type() == TypeConstant)
  3315. {
  3316. auto &c = id.get<SPIRConstant>();
  3317. bool needs_declaration = c.specialization || c.is_used_as_lut;
  3318. if (needs_declaration)
  3319. {
  3320. if (!options.vulkan_semantics && c.specialization)
  3321. {
  3322. c.specialization_constant_macro_name =
  3323. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  3324. }
  3325. emit_constant(c);
  3326. emitted = true;
  3327. }
  3328. }
  3329. else if (id.get_type() == TypeConstantOp)
  3330. {
  3331. emit_specialization_constant_op(id.get<SPIRConstantOp>());
  3332. emitted = true;
  3333. }
  3334. else if (id.get_type() == TypeType)
  3335. {
  3336. auto *type = &id.get<SPIRType>();
  3337. bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
  3338. (!has_decoration(type->self, DecorationBlock) &&
  3339. !has_decoration(type->self, DecorationBufferBlock));
  3340. // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
  3341. if (type->basetype == SPIRType::Struct && type->pointer &&
  3342. has_decoration(type->self, DecorationBlock) &&
  3343. (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
  3344. type->storage == StorageClassHitAttributeKHR))
  3345. {
  3346. type = &get<SPIRType>(type->parent_type);
  3347. is_natural_struct = true;
  3348. }
  3349. if (is_natural_struct)
  3350. {
  3351. if (emitted)
  3352. statement("");
  3353. emitted = false;
  3354. emit_struct(*type);
  3355. }
  3356. }
  3357. else if (id.get_type() == TypeUndef)
  3358. {
  3359. auto &undef = id.get<SPIRUndef>();
  3360. auto &type = this->get<SPIRType>(undef.basetype);
  3361. // OpUndef can be void for some reason ...
  3362. if (type.basetype == SPIRType::Void)
  3363. continue;
  3364. // This will break. It is bogus and should not be legal.
  3365. if (type_is_top_level_block(type))
  3366. continue;
  3367. string initializer;
  3368. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  3369. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  3370. // FIXME: If used in a constant, we must declare it as one.
  3371. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  3372. emitted = true;
  3373. }
  3374. }
  3375. }
  3376. if (emitted)
  3377. statement("");
  3378. // If we needed to declare work group size late, check here.
  3379. // If the work group size depends on a specialization constant, we need to declare the layout() block
  3380. // after constants (and their macros) have been declared.
  3381. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
  3382. (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)))
  3383. {
  3384. SpecializationConstant wg_x, wg_y, wg_z;
  3385. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  3386. if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
  3387. {
  3388. SmallVector<string> inputs;
  3389. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  3390. statement("layout(", merge(inputs), ") in;");
  3391. statement("");
  3392. }
  3393. }
  3394. emitted = false;
  3395. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  3396. {
  3397. // Output buffer reference blocks.
  3398. // Buffer reference blocks can reference themselves to support things like linked lists.
  3399. for (auto type : physical_storage_non_block_pointer_types)
  3400. emit_buffer_reference_block(type, false);
  3401. ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type) {
  3402. if (is_physical_pointer_to_buffer_block(type))
  3403. emit_buffer_reference_block(id, false);
  3404. });
  3405. }
  3406. // Output UBOs and SSBOs
  3407. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3408. auto &type = this->get<SPIRType>(var.basetype);
  3409. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
  3410. type.storage == StorageClassShaderRecordBufferKHR;
  3411. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  3412. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  3413. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  3414. has_block_flags)
  3415. {
  3416. emit_buffer_block(var);
  3417. }
  3418. });
  3419. // Output push constant blocks
  3420. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3421. auto &type = this->get<SPIRType>(var.basetype);
  3422. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  3423. !is_hidden_variable(var))
  3424. {
  3425. emit_push_constant_block(var);
  3426. }
  3427. });
  3428. bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
  3429. // Output Uniform Constants (values, samplers, images, etc).
  3430. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3431. auto &type = this->get<SPIRType>(var.basetype);
  3432. // If we're remapping separate samplers and images, only emit the combined samplers.
  3433. if (skip_separate_image_sampler)
  3434. {
  3435. // Sampler buffers are always used without a sampler, and they will also work in regular GL.
  3436. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  3437. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  3438. bool separate_sampler = type.basetype == SPIRType::Sampler;
  3439. if (!sampler_buffer && (separate_image || separate_sampler))
  3440. return;
  3441. }
  3442. if (var.storage != StorageClassFunction && type.pointer &&
  3443. (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
  3444. type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
  3445. type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
  3446. type.storage == StorageClassHitAttributeKHR) &&
  3447. !is_hidden_variable(var))
  3448. {
  3449. emit_uniform(var);
  3450. emitted = true;
  3451. }
  3452. });
  3453. if (emitted)
  3454. statement("");
  3455. emitted = false;
  3456. bool emitted_base_instance = false;
  3457. // Output in/out interfaces.
  3458. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3459. auto &type = this->get<SPIRType>(var.basetype);
  3460. bool is_hidden = is_hidden_variable(var);
  3461. // Unused output I/O variables might still be required to implement framebuffer fetch.
  3462. if (var.storage == StorageClassOutput && !is_legacy() &&
  3463. location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
  3464. {
  3465. is_hidden = false;
  3466. }
  3467. if (var.storage != StorageClassFunction && type.pointer &&
  3468. (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  3469. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  3470. {
  3471. if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
  3472. type.array.size() == 1)
  3473. {
  3474. SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
  3475. }
  3476. emit_interface_block(var);
  3477. emitted = true;
  3478. }
  3479. else if (is_builtin_variable(var))
  3480. {
  3481. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3482. // For gl_InstanceIndex emulation on GLES, the API user needs to
  3483. // supply this uniform.
  3484. // The draw parameter extension is soft-enabled on GL with some fallbacks.
  3485. if (!options.vulkan_semantics)
  3486. {
  3487. if (!emitted_base_instance &&
  3488. ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
  3489. (builtin == BuiltInBaseInstance)))
  3490. {
  3491. statement("#ifdef GL_ARB_shader_draw_parameters");
  3492. statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
  3493. statement("#else");
  3494. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3495. statement("uniform int SPIRV_Cross_BaseInstance;");
  3496. statement("#endif");
  3497. emitted = true;
  3498. emitted_base_instance = true;
  3499. }
  3500. else if (builtin == BuiltInBaseVertex)
  3501. {
  3502. statement("#ifdef GL_ARB_shader_draw_parameters");
  3503. statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
  3504. statement("#else");
  3505. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3506. statement("uniform int SPIRV_Cross_BaseVertex;");
  3507. statement("#endif");
  3508. }
  3509. else if (builtin == BuiltInDrawIndex)
  3510. {
  3511. statement("#ifndef GL_ARB_shader_draw_parameters");
  3512. // Cannot really be worked around.
  3513. statement("#error GL_ARB_shader_draw_parameters is not supported.");
  3514. statement("#endif");
  3515. }
  3516. }
  3517. }
  3518. });
  3519. // Global variables.
  3520. for (auto global : global_variables)
  3521. {
  3522. auto &var = get<SPIRVariable>(global);
  3523. if (is_hidden_variable(var, true))
  3524. continue;
  3525. if (var.storage != StorageClassOutput)
  3526. {
  3527. if (!variable_is_lut(var))
  3528. {
  3529. add_resource_name(var.self);
  3530. string initializer;
  3531. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  3532. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  3533. {
  3534. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  3535. }
  3536. statement(variable_decl(var), initializer, ";");
  3537. emitted = true;
  3538. }
  3539. }
  3540. else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
  3541. {
  3542. emit_output_variable_initializer(var);
  3543. }
  3544. }
  3545. if (emitted)
  3546. statement("");
  3547. }
  3548. void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
  3549. {
  3550. // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
  3551. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  3552. auto &type = get<SPIRType>(var.basetype);
  3553. bool is_patch = has_decoration(var.self, DecorationPatch);
  3554. bool is_block = has_decoration(type.self, DecorationBlock);
  3555. bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
  3556. if (is_block)
  3557. {
  3558. uint32_t member_count = uint32_t(type.member_types.size());
  3559. bool type_is_array = type.array.size() == 1;
  3560. uint32_t array_size = 1;
  3561. if (type_is_array)
  3562. array_size = to_array_size_literal(type);
  3563. uint32_t iteration_count = is_control_point ? 1 : array_size;
  3564. // If the initializer is a block, we must initialize each block member one at a time.
  3565. for (uint32_t i = 0; i < member_count; i++)
  3566. {
  3567. // These outputs might not have been properly declared, so don't initialize them in that case.
  3568. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  3569. {
  3570. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
  3571. !cull_distance_count)
  3572. continue;
  3573. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
  3574. !clip_distance_count)
  3575. continue;
  3576. }
  3577. // We need to build a per-member array first, essentially transposing from AoS to SoA.
  3578. // This code path hits when we have an array of blocks.
  3579. string lut_name;
  3580. if (type_is_array)
  3581. {
  3582. lut_name = join("_", var.self, "_", i, "_init");
  3583. uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
  3584. auto &member_type = get<SPIRType>(member_type_id);
  3585. auto array_type = member_type;
  3586. array_type.parent_type = member_type_id;
  3587. array_type.op = OpTypeArray;
  3588. array_type.array.push_back(array_size);
  3589. array_type.array_size_literal.push_back(true);
  3590. SmallVector<string> exprs;
  3591. exprs.reserve(array_size);
  3592. auto &c = get<SPIRConstant>(var.initializer);
  3593. for (uint32_t j = 0; j < array_size; j++)
  3594. exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
  3595. statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type, 0), " = ",
  3596. type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
  3597. }
  3598. for (uint32_t j = 0; j < iteration_count; j++)
  3599. {
  3600. entry_func.fixup_hooks_in.push_back([=, &var]() {
  3601. AccessChainMeta meta;
  3602. auto &c = this->get<SPIRConstant>(var.initializer);
  3603. uint32_t invocation_id = 0;
  3604. uint32_t member_index_id = 0;
  3605. if (is_control_point)
  3606. {
  3607. uint32_t ids = ir.increase_bound_by(3);
  3608. auto &uint_type = set<SPIRType>(ids, OpTypeInt);
  3609. uint_type.basetype = SPIRType::UInt;
  3610. uint_type.width = 32;
  3611. set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
  3612. set<SPIRConstant>(ids + 2, ids, i, false);
  3613. invocation_id = ids + 1;
  3614. member_index_id = ids + 2;
  3615. }
  3616. if (is_patch)
  3617. {
  3618. statement("if (gl_InvocationID == 0)");
  3619. begin_scope();
  3620. }
  3621. if (type_is_array && !is_control_point)
  3622. {
  3623. uint32_t indices[2] = { j, i };
  3624. auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3625. statement(chain, " = ", lut_name, "[", j, "];");
  3626. }
  3627. else if (is_control_point)
  3628. {
  3629. uint32_t indices[2] = { invocation_id, member_index_id };
  3630. auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
  3631. statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
  3632. }
  3633. else
  3634. {
  3635. auto chain =
  3636. access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3637. statement(chain, " = ", to_expression(c.subconstants[i]), ";");
  3638. }
  3639. if (is_patch)
  3640. end_scope();
  3641. });
  3642. }
  3643. }
  3644. }
  3645. else if (is_control_point)
  3646. {
  3647. auto lut_name = join("_", var.self, "_init");
  3648. statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0),
  3649. " = ", to_expression(var.initializer), ";");
  3650. entry_func.fixup_hooks_in.push_back([&, lut_name]() {
  3651. statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
  3652. });
  3653. }
  3654. else if (has_decoration(var.self, DecorationBuiltIn) &&
  3655. BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
  3656. {
  3657. // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
  3658. entry_func.fixup_hooks_in.push_back([&] {
  3659. auto &c = this->get<SPIRConstant>(var.initializer);
  3660. uint32_t num_constants = uint32_t(c.subconstants.size());
  3661. for (uint32_t i = 0; i < num_constants; i++)
  3662. {
  3663. // Don't use to_expression on constant since it might be uint, just fish out the raw int.
  3664. statement(to_expression(var.self), "[", i, "] = ",
  3665. convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
  3666. }
  3667. });
  3668. }
  3669. else
  3670. {
  3671. auto lut_name = join("_", var.self, "_init");
  3672. statement("const ", type_to_glsl(type), " ", lut_name,
  3673. type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";");
  3674. entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
  3675. if (is_patch)
  3676. {
  3677. statement("if (gl_InvocationID == 0)");
  3678. begin_scope();
  3679. }
  3680. statement(to_expression(var.self), " = ", lut_name, ";");
  3681. if (is_patch)
  3682. end_scope();
  3683. });
  3684. }
  3685. }
  3686. void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
  3687. {
  3688. std::string result;
  3689. switch (group_op)
  3690. {
  3691. case GroupOperationReduce:
  3692. result = "reduction";
  3693. break;
  3694. case GroupOperationExclusiveScan:
  3695. result = "excl_scan";
  3696. break;
  3697. case GroupOperationInclusiveScan:
  3698. result = "incl_scan";
  3699. break;
  3700. default:
  3701. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3702. }
  3703. struct TypeInfo
  3704. {
  3705. std::string type;
  3706. std::string identity;
  3707. };
  3708. std::vector<TypeInfo> type_infos;
  3709. switch (op)
  3710. {
  3711. case OpGroupNonUniformIAdd:
  3712. {
  3713. type_infos.emplace_back(TypeInfo{ "uint", "0u" });
  3714. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" });
  3715. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" });
  3716. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" });
  3717. type_infos.emplace_back(TypeInfo{ "int", "0" });
  3718. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" });
  3719. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" });
  3720. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" });
  3721. break;
  3722. }
  3723. case OpGroupNonUniformFAdd:
  3724. {
  3725. type_infos.emplace_back(TypeInfo{ "float", "0.0f" });
  3726. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" });
  3727. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" });
  3728. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" });
  3729. // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
  3730. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3731. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" });
  3732. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" });
  3733. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" });
  3734. break;
  3735. }
  3736. case OpGroupNonUniformIMul:
  3737. {
  3738. type_infos.emplace_back(TypeInfo{ "uint", "1u" });
  3739. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" });
  3740. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" });
  3741. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" });
  3742. type_infos.emplace_back(TypeInfo{ "int", "1" });
  3743. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" });
  3744. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" });
  3745. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" });
  3746. break;
  3747. }
  3748. case OpGroupNonUniformFMul:
  3749. {
  3750. type_infos.emplace_back(TypeInfo{ "float", "1.0f" });
  3751. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" });
  3752. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" });
  3753. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" });
  3754. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3755. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" });
  3756. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" });
  3757. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" });
  3758. break;
  3759. }
  3760. default:
  3761. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3762. }
  3763. const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
  3764. const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
  3765. std::string op_symbol;
  3766. if (op_is_addition)
  3767. {
  3768. op_symbol = "+=";
  3769. }
  3770. else if (op_is_multiplication)
  3771. {
  3772. op_symbol = "*=";
  3773. }
  3774. for (const TypeInfo &t : type_infos)
  3775. {
  3776. statement(t.type, " ", func, "(", t.type, " v)");
  3777. begin_scope();
  3778. statement(t.type, " ", result, " = ", t.identity, ";");
  3779. statement("uvec4 active_threads = subgroupBallot(true);");
  3780. statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
  3781. begin_scope();
  3782. statement("uint total = gl_SubgroupSize / 2u;");
  3783. statement(result, " = v;");
  3784. statement("for (uint i = 1u; i <= total; i <<= 1u)");
  3785. begin_scope();
  3786. statement("bool valid;");
  3787. if (group_op == GroupOperationReduce)
  3788. {
  3789. statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);");
  3790. }
  3791. else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3792. {
  3793. statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);");
  3794. }
  3795. if (op_is_addition || op_is_multiplication)
  3796. {
  3797. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3798. }
  3799. end_scope();
  3800. if (group_op == GroupOperationExclusiveScan)
  3801. {
  3802. statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);");
  3803. statement("if (subgroupElect())");
  3804. begin_scope();
  3805. statement(result, " = ", t.identity, ";");
  3806. end_scope();
  3807. }
  3808. end_scope();
  3809. statement("else");
  3810. begin_scope();
  3811. if (group_op == GroupOperationExclusiveScan)
  3812. {
  3813. statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
  3814. }
  3815. else if (group_op == GroupOperationInclusiveScan)
  3816. {
  3817. statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
  3818. }
  3819. statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
  3820. begin_scope();
  3821. statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
  3822. statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
  3823. if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3824. {
  3825. statement("valid = valid && (i < total);");
  3826. }
  3827. if (op_is_addition || op_is_multiplication)
  3828. {
  3829. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3830. }
  3831. end_scope();
  3832. end_scope();
  3833. statement("return ", result, ";");
  3834. end_scope();
  3835. }
  3836. }
  3837. void CompilerGLSL::emit_extension_workarounds(ExecutionModel model)
  3838. {
  3839. static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
  3840. "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
  3841. if (!options.vulkan_semantics)
  3842. {
  3843. using Supp = ShaderSubgroupSupportHelper;
  3844. auto result = shader_subgroup_supporter.resolve();
  3845. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
  3846. {
  3847. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
  3848. for (auto &e : exts)
  3849. {
  3850. const char *name = Supp::get_extension_name(e);
  3851. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3852. switch (e)
  3853. {
  3854. case Supp::NV_shader_thread_group:
  3855. statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
  3856. statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
  3857. statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
  3858. statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
  3859. statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
  3860. break;
  3861. case Supp::ARB_shader_ballot:
  3862. statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
  3863. statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
  3864. statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
  3865. statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
  3866. statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
  3867. break;
  3868. default:
  3869. break;
  3870. }
  3871. }
  3872. statement("#endif");
  3873. statement("");
  3874. }
  3875. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
  3876. {
  3877. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
  3878. for (auto &e : exts)
  3879. {
  3880. const char *name = Supp::get_extension_name(e);
  3881. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3882. switch (e)
  3883. {
  3884. case Supp::NV_shader_thread_group:
  3885. statement("#define gl_SubgroupSize gl_WarpSizeNV");
  3886. break;
  3887. case Supp::ARB_shader_ballot:
  3888. statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
  3889. break;
  3890. case Supp::AMD_gcn_shader:
  3891. statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
  3892. break;
  3893. default:
  3894. break;
  3895. }
  3896. }
  3897. statement("#endif");
  3898. statement("");
  3899. }
  3900. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
  3901. {
  3902. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
  3903. for (auto &e : exts)
  3904. {
  3905. const char *name = Supp::get_extension_name(e);
  3906. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3907. switch (e)
  3908. {
  3909. case Supp::NV_shader_thread_group:
  3910. statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
  3911. break;
  3912. case Supp::ARB_shader_ballot:
  3913. statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
  3914. break;
  3915. default:
  3916. break;
  3917. }
  3918. }
  3919. statement("#endif");
  3920. statement("");
  3921. }
  3922. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
  3923. {
  3924. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
  3925. for (auto &e : exts)
  3926. {
  3927. const char *name = Supp::get_extension_name(e);
  3928. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3929. switch (e)
  3930. {
  3931. case Supp::NV_shader_thread_group:
  3932. statement("#define gl_SubgroupID gl_WarpIDNV");
  3933. break;
  3934. default:
  3935. break;
  3936. }
  3937. }
  3938. statement("#endif");
  3939. statement("");
  3940. }
  3941. if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
  3942. {
  3943. auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
  3944. for (auto &e : exts)
  3945. {
  3946. const char *name = Supp::get_extension_name(e);
  3947. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3948. switch (e)
  3949. {
  3950. case Supp::NV_shader_thread_group:
  3951. statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
  3952. break;
  3953. default:
  3954. break;
  3955. }
  3956. }
  3957. statement("#endif");
  3958. statement("");
  3959. }
  3960. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
  3961. {
  3962. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
  3963. for (auto &e : exts)
  3964. {
  3965. const char *name = Supp::get_extension_name(e);
  3966. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3967. switch (e)
  3968. {
  3969. case Supp::NV_shader_thread_shuffle:
  3970. for (const char *t : workaround_types)
  3971. {
  3972. statement(t, " subgroupBroadcastFirst(", t,
  3973. " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
  3974. }
  3975. for (const char *t : workaround_types)
  3976. {
  3977. statement(t, " subgroupBroadcast(", t,
  3978. " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
  3979. }
  3980. break;
  3981. case Supp::ARB_shader_ballot:
  3982. for (const char *t : workaround_types)
  3983. {
  3984. statement(t, " subgroupBroadcastFirst(", t,
  3985. " value) { return readFirstInvocationARB(value); }");
  3986. }
  3987. for (const char *t : workaround_types)
  3988. {
  3989. statement(t, " subgroupBroadcast(", t,
  3990. " value, uint id) { return readInvocationARB(value, id); }");
  3991. }
  3992. break;
  3993. default:
  3994. break;
  3995. }
  3996. }
  3997. statement("#endif");
  3998. statement("");
  3999. }
  4000. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
  4001. {
  4002. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
  4003. for (auto &e : exts)
  4004. {
  4005. const char *name = Supp::get_extension_name(e);
  4006. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4007. switch (e)
  4008. {
  4009. case Supp::NV_shader_thread_group:
  4010. statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
  4011. statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
  4012. break;
  4013. default:
  4014. break;
  4015. }
  4016. }
  4017. statement("#else");
  4018. statement("uint subgroupBallotFindLSB(uvec4 value)");
  4019. begin_scope();
  4020. statement("int firstLive = findLSB(value.x);");
  4021. statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
  4022. end_scope();
  4023. statement("uint subgroupBallotFindMSB(uvec4 value)");
  4024. begin_scope();
  4025. statement("int firstLive = findMSB(value.y);");
  4026. statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
  4027. end_scope();
  4028. statement("#endif");
  4029. statement("");
  4030. }
  4031. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
  4032. {
  4033. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
  4034. for (auto &e : exts)
  4035. {
  4036. const char *name = Supp::get_extension_name(e);
  4037. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4038. switch (e)
  4039. {
  4040. case Supp::NV_gpu_shader_5:
  4041. statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
  4042. statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
  4043. statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
  4044. break;
  4045. case Supp::ARB_shader_group_vote:
  4046. statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
  4047. statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
  4048. statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
  4049. break;
  4050. case Supp::AMD_gcn_shader:
  4051. statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
  4052. statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
  4053. statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
  4054. "b == ballotAMD(true); }");
  4055. break;
  4056. default:
  4057. break;
  4058. }
  4059. }
  4060. statement("#endif");
  4061. statement("");
  4062. }
  4063. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
  4064. {
  4065. statement("#ifndef GL_KHR_shader_subgroup_vote");
  4066. statement(
  4067. "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
  4068. "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
  4069. for (const char *t : workaround_types)
  4070. statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
  4071. statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
  4072. statement("#endif");
  4073. statement("");
  4074. }
  4075. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
  4076. {
  4077. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
  4078. for (auto &e : exts)
  4079. {
  4080. const char *name = Supp::get_extension_name(e);
  4081. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4082. switch (e)
  4083. {
  4084. case Supp::NV_shader_thread_group:
  4085. statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
  4086. break;
  4087. case Supp::ARB_shader_ballot:
  4088. statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
  4089. break;
  4090. default:
  4091. break;
  4092. }
  4093. }
  4094. statement("#endif");
  4095. statement("");
  4096. }
  4097. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
  4098. {
  4099. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4100. statement("bool subgroupElect()");
  4101. begin_scope();
  4102. statement("uvec4 activeMask = subgroupBallot(true);");
  4103. statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
  4104. statement("return gl_SubgroupInvocationID == firstLive;");
  4105. end_scope();
  4106. statement("#endif");
  4107. statement("");
  4108. }
  4109. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
  4110. {
  4111. // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
  4112. // that subgroup execute in lockstep so this barrier is implicit.
  4113. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
  4114. // and a specific test of optimizing scans by leveraging lock-step invocation execution,
  4115. // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
  4116. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
  4117. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4118. statement("void subgroupBarrier() { memoryBarrierShared(); }");
  4119. statement("#endif");
  4120. statement("");
  4121. }
  4122. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
  4123. {
  4124. if (model == ExecutionModelGLCompute)
  4125. {
  4126. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4127. statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
  4128. statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
  4129. statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
  4130. statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
  4131. statement("#endif");
  4132. }
  4133. else
  4134. {
  4135. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4136. statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
  4137. statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
  4138. statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
  4139. statement("#endif");
  4140. }
  4141. statement("");
  4142. }
  4143. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
  4144. {
  4145. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4146. statement("bool subgroupInverseBallot(uvec4 value)");
  4147. begin_scope();
  4148. statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
  4149. end_scope();
  4150. statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
  4151. begin_scope();
  4152. statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
  4153. statement("ivec2 c = bitCount(v);");
  4154. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4155. statement("return uint(c.x);");
  4156. statement_no_indent("#else");
  4157. statement("return uint(c.x + c.y);");
  4158. statement_no_indent("#endif");
  4159. end_scope();
  4160. statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
  4161. begin_scope();
  4162. statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
  4163. statement("ivec2 c = bitCount(v);");
  4164. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4165. statement("return uint(c.x);");
  4166. statement_no_indent("#else");
  4167. statement("return uint(c.x + c.y);");
  4168. statement_no_indent("#endif");
  4169. end_scope();
  4170. statement("#endif");
  4171. statement("");
  4172. }
  4173. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
  4174. {
  4175. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4176. statement("uint subgroupBallotBitCount(uvec4 value)");
  4177. begin_scope();
  4178. statement("ivec2 c = bitCount(value.xy);");
  4179. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4180. statement("return uint(c.x);");
  4181. statement_no_indent("#else");
  4182. statement("return uint(c.x + c.y);");
  4183. statement_no_indent("#endif");
  4184. end_scope();
  4185. statement("#endif");
  4186. statement("");
  4187. }
  4188. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
  4189. {
  4190. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4191. statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
  4192. begin_scope();
  4193. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4194. statement("uint shifted = value.x >> index;");
  4195. statement_no_indent("#else");
  4196. statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
  4197. statement_no_indent("#endif");
  4198. statement("return (shifted & 1u) != 0u;");
  4199. end_scope();
  4200. statement("#endif");
  4201. statement("");
  4202. }
  4203. auto arithmetic_feature_helper =
  4204. [&](Supp::Feature feat, std::string func_name, Op op, GroupOperation group_op)
  4205. {
  4206. if (shader_subgroup_supporter.is_feature_requested(feat))
  4207. {
  4208. auto exts = Supp::get_candidates_for_feature(feat, result);
  4209. for (auto &e : exts)
  4210. {
  4211. const char *name = Supp::get_extension_name(e);
  4212. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4213. switch (e)
  4214. {
  4215. case Supp::NV_shader_thread_shuffle:
  4216. emit_subgroup_arithmetic_workaround(func_name, op, group_op);
  4217. break;
  4218. default:
  4219. break;
  4220. }
  4221. }
  4222. statement("#endif");
  4223. statement("");
  4224. }
  4225. };
  4226. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
  4227. GroupOperationReduce);
  4228. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
  4229. OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
  4230. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
  4231. OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
  4232. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
  4233. GroupOperationReduce);
  4234. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
  4235. OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
  4236. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
  4237. OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
  4238. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
  4239. GroupOperationReduce);
  4240. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
  4241. OpGroupNonUniformIMul, GroupOperationExclusiveScan);
  4242. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
  4243. OpGroupNonUniformIMul, GroupOperationInclusiveScan);
  4244. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
  4245. GroupOperationReduce);
  4246. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
  4247. OpGroupNonUniformFMul, GroupOperationExclusiveScan);
  4248. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
  4249. OpGroupNonUniformFMul, GroupOperationInclusiveScan);
  4250. }
  4251. if (!workaround_ubo_load_overload_types.empty())
  4252. {
  4253. for (auto &type_id : workaround_ubo_load_overload_types)
  4254. {
  4255. auto &type = get<SPIRType>(type_id);
  4256. if (options.es && is_matrix(type))
  4257. {
  4258. // Need both variants.
  4259. // GLSL cannot overload on precision, so need to dispatch appropriately.
  4260. statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }");
  4261. statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }");
  4262. }
  4263. else
  4264. {
  4265. statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }");
  4266. }
  4267. }
  4268. statement("");
  4269. }
  4270. }
  4271. void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
  4272. {
  4273. const char *qual = "";
  4274. const char *suffix = (options.es && relaxed) ? "MP" : "";
  4275. if (options.es)
  4276. qual = relaxed ? "mediump " : "highp ";
  4277. if (polyfills & PolyfillTranspose2x2)
  4278. {
  4279. statement(qual, "mat2 spvTranspose", suffix, "(", qual, "mat2 m)");
  4280. begin_scope();
  4281. statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
  4282. end_scope();
  4283. statement("");
  4284. }
  4285. if (polyfills & PolyfillTranspose3x3)
  4286. {
  4287. statement(qual, "mat3 spvTranspose", suffix, "(", qual, "mat3 m)");
  4288. begin_scope();
  4289. statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
  4290. end_scope();
  4291. statement("");
  4292. }
  4293. if (polyfills & PolyfillTranspose4x4)
  4294. {
  4295. statement(qual, "mat4 spvTranspose", suffix, "(", qual, "mat4 m)");
  4296. begin_scope();
  4297. statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
  4298. "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
  4299. end_scope();
  4300. statement("");
  4301. }
  4302. if (polyfills & PolyfillDeterminant2x2)
  4303. {
  4304. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat2 m)");
  4305. begin_scope();
  4306. statement("return m[0][0] * m[1][1] - m[0][1] * m[1][0];");
  4307. end_scope();
  4308. statement("");
  4309. }
  4310. if (polyfills & PolyfillDeterminant3x3)
  4311. {
  4312. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)");
  4313. begin_scope();
  4314. statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], "
  4315. "m[1][2] * m[2][0] - m[1][0] * m[2][2], "
  4316. "m[1][0] * m[2][1] - m[1][1] * m[2][0]));");
  4317. end_scope();
  4318. statement("");
  4319. }
  4320. if (polyfills & PolyfillDeterminant4x4)
  4321. {
  4322. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)");
  4323. begin_scope();
  4324. statement("return dot(m[0], vec4("
  4325. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4326. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4327. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4328. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));");
  4329. end_scope();
  4330. statement("");
  4331. }
  4332. if (polyfills & PolyfillMatrixInverse2x2)
  4333. {
  4334. statement(qual, "mat2 spvInverse", suffix, "(", qual, "mat2 m)");
  4335. begin_scope();
  4336. statement("return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) "
  4337. "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));");
  4338. end_scope();
  4339. statement("");
  4340. }
  4341. if (polyfills & PolyfillMatrixInverse3x3)
  4342. {
  4343. statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)");
  4344. begin_scope();
  4345. statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);");
  4346. statement("return mat3(t[0], "
  4347. "m[0][2] * m[2][1] - m[0][1] * m[2][2], "
  4348. "m[0][1] * m[1][2] - m[0][2] * m[1][1], "
  4349. "t[1], "
  4350. "m[0][0] * m[2][2] - m[0][2] * m[2][0], "
  4351. "m[0][2] * m[1][0] - m[0][0] * m[1][2], "
  4352. "t[2], "
  4353. "m[0][1] * m[2][0] - m[0][0] * m[2][1], "
  4354. "m[0][0] * m[1][1] - m[0][1] * m[1][0]) "
  4355. "* (1.0 / dot(m[0], t));");
  4356. end_scope();
  4357. statement("");
  4358. }
  4359. if (polyfills & PolyfillMatrixInverse4x4)
  4360. {
  4361. statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)");
  4362. begin_scope();
  4363. statement(qual, "vec4 t = vec4("
  4364. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4365. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4366. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4367. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);");
  4368. statement("return mat4("
  4369. "t[0], "
  4370. "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], "
  4371. "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], "
  4372. "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], "
  4373. "t[1], "
  4374. "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], "
  4375. "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], "
  4376. "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], "
  4377. "t[2], "
  4378. "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], "
  4379. "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], "
  4380. "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], "
  4381. "t[3], "
  4382. "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], "
  4383. "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], "
  4384. "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) "
  4385. "* (1.0 / dot(m[0], t));");
  4386. end_scope();
  4387. statement("");
  4388. }
  4389. if (!relaxed)
  4390. {
  4391. static const Polyfill polys[3][3] = {
  4392. { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
  4393. { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
  4394. { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
  4395. };
  4396. static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp };
  4397. static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
  4398. bool has_poly = false;
  4399. for (uint32_t i = 0; i < 3; i++)
  4400. {
  4401. for (uint32_t j = 0; j < 3; j++)
  4402. {
  4403. if ((polyfills & polys[i][j]) == 0)
  4404. continue;
  4405. const char *types[3][4] = {
  4406. { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
  4407. { "float", "vec2", "vec3", "vec4" },
  4408. { "double", "dvec2", "dvec3", "dvec4" },
  4409. };
  4410. for (uint32_t k = 0; k < 4; k++)
  4411. {
  4412. auto *type = types[j][k];
  4413. if (i < 2)
  4414. {
  4415. statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
  4416. type, " ", spv_ops[i], "(", type, ", ", type, ");");
  4417. }
  4418. else
  4419. {
  4420. statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
  4421. type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");");
  4422. }
  4423. has_poly = true;
  4424. }
  4425. }
  4426. }
  4427. if (has_poly)
  4428. statement("");
  4429. }
  4430. else
  4431. {
  4432. // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump
  4433. // propagation.
  4434. static const Polyfill polys[3][3] = {
  4435. { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
  4436. { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
  4437. { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
  4438. };
  4439. static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
  4440. for (uint32_t i = 0; i < 3; i++)
  4441. {
  4442. for (uint32_t j = 0; j < 3; j++)
  4443. {
  4444. if ((polyfills & polys[i][j]) == 0)
  4445. continue;
  4446. const char *types[3][4] = {
  4447. { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
  4448. { "float", "vec2", "vec3", "vec4" },
  4449. { "double", "dvec2", "dvec3", "dvec4" },
  4450. };
  4451. for (uint32_t k = 0; k < 4; k++)
  4452. {
  4453. auto *type = types[j][k];
  4454. if (i < 2)
  4455. {
  4456. statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
  4457. "mediump ", type, " a, mediump ", type, " b)");
  4458. begin_scope();
  4459. statement("mediump ", type, " res = ", spv_ops[i], "(a, b);");
  4460. statement("return res;");
  4461. end_scope();
  4462. statement("");
  4463. }
  4464. else
  4465. {
  4466. statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
  4467. "mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)");
  4468. begin_scope();
  4469. statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);");
  4470. statement("return res;");
  4471. end_scope();
  4472. statement("");
  4473. }
  4474. }
  4475. }
  4476. }
  4477. }
  4478. }
  4479. // Returns a string representation of the ID, usable as a function arg.
  4480. // Default is to simply return the expression representation fo the arg ID.
  4481. // Subclasses may override to modify the return value.
  4482. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  4483. {
  4484. // BDA expects pointers through function interface.
  4485. if (!arg.alias_global_variable && is_physical_or_buffer_pointer(expression_type(id)))
  4486. return to_pointer_expression(id);
  4487. // Make sure that we use the name of the original variable, and not the parameter alias.
  4488. uint32_t name_id = id;
  4489. auto *var = maybe_get<SPIRVariable>(id);
  4490. if (var && var->basevariable)
  4491. name_id = var->basevariable;
  4492. return to_unpacked_expression(name_id);
  4493. }
  4494. void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
  4495. {
  4496. auto res = forced_temporaries.insert(id);
  4497. // Forcing new temporaries guarantees forward progress.
  4498. if (res.second)
  4499. force_recompile_guarantee_forward_progress();
  4500. else
  4501. force_recompile();
  4502. }
  4503. uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
  4504. {
  4505. // Constants do not have innate precision.
  4506. auto handle_type = ir.ids[id].get_type();
  4507. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  4508. return id;
  4509. // Ignore anything that isn't 32-bit values.
  4510. auto &type = get<SPIRType>(type_id);
  4511. if (type.pointer)
  4512. return id;
  4513. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
  4514. return id;
  4515. if (precision == Options::DontCare)
  4516. {
  4517. // If precision is consumed as don't care (operations only consisting of constants),
  4518. // we need to bind the expression to a temporary,
  4519. // otherwise we have no way of controlling the precision later.
  4520. auto itr = forced_temporaries.insert(id);
  4521. if (itr.second)
  4522. force_recompile_guarantee_forward_progress();
  4523. return id;
  4524. }
  4525. auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
  4526. if (current_precision == precision)
  4527. return id;
  4528. auto itr = temporary_to_mirror_precision_alias.find(id);
  4529. if (itr == temporary_to_mirror_precision_alias.end())
  4530. {
  4531. uint32_t alias_id = ir.increase_bound_by(1);
  4532. auto &m = ir.meta[alias_id];
  4533. if (auto *input_m = ir.find_meta(id))
  4534. m = *input_m;
  4535. const char *prefix;
  4536. if (precision == Options::Mediump)
  4537. {
  4538. set_decoration(alias_id, DecorationRelaxedPrecision);
  4539. prefix = "mp_copy_";
  4540. }
  4541. else
  4542. {
  4543. unset_decoration(alias_id, DecorationRelaxedPrecision);
  4544. prefix = "hp_copy_";
  4545. }
  4546. auto alias_name = join(prefix, to_name(id));
  4547. ParsedIR::sanitize_underscores(alias_name);
  4548. set_name(alias_id, alias_name);
  4549. emit_op(type_id, alias_id, to_expression(id), true);
  4550. temporary_to_mirror_precision_alias[id] = alias_id;
  4551. forced_temporaries.insert(id);
  4552. forced_temporaries.insert(alias_id);
  4553. force_recompile_guarantee_forward_progress();
  4554. id = alias_id;
  4555. }
  4556. else
  4557. {
  4558. id = itr->second;
  4559. }
  4560. return id;
  4561. }
  4562. void CompilerGLSL::handle_invalid_expression(uint32_t id)
  4563. {
  4564. // We tried to read an invalidated expression.
  4565. // This means we need another pass at compilation, but next time,
  4566. // force temporary variables so that they cannot be invalidated.
  4567. force_temporary_and_recompile(id);
  4568. // If the invalid expression happened as a result of a CompositeInsert
  4569. // overwrite, we must block this from happening next iteration.
  4570. if (composite_insert_overwritten.count(id))
  4571. block_composite_insert_overwrite.insert(id);
  4572. }
  4573. // Converts the format of the current expression from packed to unpacked,
  4574. // by wrapping the expression in a constructor of the appropriate type.
  4575. // GLSL does not support packed formats, so simply return the expression.
  4576. // Subclasses that do will override.
  4577. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
  4578. {
  4579. return expr_str;
  4580. }
  4581. // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
  4582. void CompilerGLSL::strip_enclosed_expression(string &expr)
  4583. {
  4584. if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
  4585. return;
  4586. // Have to make sure that our first and last parens actually enclose everything inside it.
  4587. uint32_t paren_count = 0;
  4588. for (auto &c : expr)
  4589. {
  4590. if (c == '(')
  4591. paren_count++;
  4592. else if (c == ')')
  4593. {
  4594. paren_count--;
  4595. // If we hit 0 and this is not the final char, our first and final parens actually don't
  4596. // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
  4597. if (paren_count == 0 && &c != &expr.back())
  4598. return;
  4599. }
  4600. }
  4601. expr.erase(expr.size() - 1, 1);
  4602. expr.erase(begin(expr));
  4603. }
  4604. bool CompilerGLSL::needs_enclose_expression(const std::string &expr)
  4605. {
  4606. bool need_parens = false;
  4607. // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
  4608. // unary expressions.
  4609. if (!expr.empty())
  4610. {
  4611. auto c = expr.front();
  4612. if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
  4613. need_parens = true;
  4614. }
  4615. if (!need_parens)
  4616. {
  4617. uint32_t paren_count = 0;
  4618. for (auto c : expr)
  4619. {
  4620. if (c == '(' || c == '[')
  4621. paren_count++;
  4622. else if (c == ')' || c == ']')
  4623. {
  4624. assert(paren_count);
  4625. paren_count--;
  4626. }
  4627. else if (c == ' ' && paren_count == 0)
  4628. {
  4629. need_parens = true;
  4630. break;
  4631. }
  4632. }
  4633. assert(paren_count == 0);
  4634. }
  4635. return need_parens;
  4636. }
  4637. string CompilerGLSL::enclose_expression(const string &expr)
  4638. {
  4639. // If this expression contains any spaces which are not enclosed by parentheses,
  4640. // we need to enclose it so we can treat the whole string as an expression.
  4641. // This happens when two expressions have been part of a binary op earlier.
  4642. if (needs_enclose_expression(expr))
  4643. return join('(', expr, ')');
  4644. else
  4645. return expr;
  4646. }
  4647. string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
  4648. {
  4649. // If this expression starts with an address-of operator ('&'), then
  4650. // just return the part after the operator.
  4651. // TODO: Strip parens if unnecessary?
  4652. if (expr.front() == '&')
  4653. return expr.substr(1);
  4654. else if (backend.native_pointers)
  4655. return join('*', expr);
  4656. else if (is_physical_pointer(expr_type) && !is_physical_pointer_to_buffer_block(expr_type))
  4657. return join(enclose_expression(expr), ".value");
  4658. else
  4659. return expr;
  4660. }
  4661. string CompilerGLSL::address_of_expression(const std::string &expr)
  4662. {
  4663. if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
  4664. {
  4665. // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
  4666. // the first two and last characters. We might have to enclose the expression.
  4667. // This doesn't work for cases like (*foo + 10),
  4668. // but this is an r-value expression which we cannot take the address of anyways.
  4669. return enclose_expression(expr.substr(2, expr.size() - 3));
  4670. }
  4671. else if (expr.front() == '*')
  4672. {
  4673. // If this expression starts with a dereference operator ('*'), then
  4674. // just return the part after the operator.
  4675. return expr.substr(1);
  4676. }
  4677. else
  4678. return join('&', enclose_expression(expr));
  4679. }
  4680. // Just like to_expression except that we enclose the expression inside parentheses if needed.
  4681. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
  4682. {
  4683. return enclose_expression(to_expression(id, register_expression_read));
  4684. }
  4685. // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
  4686. // need_transpose must be forced to false.
  4687. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
  4688. {
  4689. return unpack_expression_type(to_expression(id), expression_type(id),
  4690. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4691. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
  4692. }
  4693. string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
  4694. {
  4695. // If we need to transpose, it will also take care of unpacking rules.
  4696. auto *e = maybe_get<SPIRExpression>(id);
  4697. bool need_transpose = e && e->need_transpose;
  4698. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4699. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4700. if (!need_transpose && (is_remapped || is_packed))
  4701. {
  4702. return unpack_expression_type(to_expression(id, register_expression_read),
  4703. get_pointee_type(expression_type_id(id)),
  4704. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4705. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  4706. }
  4707. else
  4708. return to_expression(id, register_expression_read);
  4709. }
  4710. string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
  4711. {
  4712. return enclose_expression(to_unpacked_expression(id, register_expression_read));
  4713. }
  4714. string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
  4715. {
  4716. auto &type = expression_type(id);
  4717. if (is_pointer(type) && should_dereference(id))
  4718. return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
  4719. else
  4720. return to_expression(id, register_expression_read);
  4721. }
  4722. string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
  4723. {
  4724. auto &type = expression_type(id);
  4725. if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
  4726. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4727. else
  4728. return to_unpacked_expression(id, register_expression_read);
  4729. }
  4730. string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
  4731. {
  4732. auto &type = expression_type(id);
  4733. if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
  4734. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4735. else
  4736. return to_enclosed_unpacked_expression(id, register_expression_read);
  4737. }
  4738. string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
  4739. {
  4740. auto expr = to_enclosed_expression(id);
  4741. if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
  4742. return join(expr, "[", index, "]");
  4743. else
  4744. return join(expr, ".", index_to_swizzle(index));
  4745. }
  4746. string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
  4747. const uint32_t *chain, uint32_t length)
  4748. {
  4749. // It is kinda silly if application actually enter this path since they know the constant up front.
  4750. // It is useful here to extract the plain constant directly.
  4751. SPIRConstant tmp;
  4752. tmp.constant_type = result_type;
  4753. auto &composite_type = get<SPIRType>(c.constant_type);
  4754. assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
  4755. assert(!c.specialization);
  4756. if (is_matrix(composite_type))
  4757. {
  4758. if (length == 2)
  4759. {
  4760. tmp.m.c[0].vecsize = 1;
  4761. tmp.m.columns = 1;
  4762. tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
  4763. }
  4764. else
  4765. {
  4766. assert(length == 1);
  4767. tmp.m.c[0].vecsize = composite_type.vecsize;
  4768. tmp.m.columns = 1;
  4769. tmp.m.c[0] = c.m.c[chain[0]];
  4770. }
  4771. }
  4772. else
  4773. {
  4774. assert(length == 1);
  4775. tmp.m.c[0].vecsize = 1;
  4776. tmp.m.columns = 1;
  4777. tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
  4778. }
  4779. return constant_expression(tmp);
  4780. }
  4781. string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type,
  4782. const string &base_expr, const SPIRType &type)
  4783. {
  4784. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4785. type.basetype == SPIRType::Boolean &&
  4786. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4787. SPIRType tmp_type { OpNop };
  4788. if (remapped_boolean)
  4789. {
  4790. tmp_type = get<SPIRType>(type.parent_type);
  4791. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4792. }
  4793. else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean)
  4794. {
  4795. // It's possible that we have an r-value expression that was OpLoaded from a struct.
  4796. // We have to reroll this and explicitly cast the input to bool, because the r-value is short.
  4797. tmp_type = get<SPIRType>(type.parent_type);
  4798. remapped_boolean = true;
  4799. }
  4800. uint32_t size = to_array_size_literal(type);
  4801. auto &parent = get<SPIRType>(type.parent_type);
  4802. string expr = "{ ";
  4803. for (uint32_t i = 0; i < size; i++)
  4804. {
  4805. auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
  4806. if (!is_array(parent))
  4807. {
  4808. if (remapped_boolean)
  4809. subexpr = join(type_to_glsl(tmp_type), "(", subexpr, ")");
  4810. expr += subexpr;
  4811. }
  4812. else
  4813. expr += to_rerolled_array_expression(parent_type, subexpr, parent);
  4814. if (i + 1 < size)
  4815. expr += ", ";
  4816. }
  4817. expr += " }";
  4818. return expr;
  4819. }
  4820. string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type)
  4821. {
  4822. auto &type = expression_type(id);
  4823. bool reroll_array = false;
  4824. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4825. type.basetype == SPIRType::Boolean &&
  4826. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4827. if (is_array(type))
  4828. {
  4829. reroll_array = !backend.array_is_value_type ||
  4830. (block_like_type && !backend.array_is_value_type_in_buffer_blocks);
  4831. if (remapped_boolean)
  4832. {
  4833. // Forced to reroll if we have to change bool[] to short[].
  4834. reroll_array = true;
  4835. }
  4836. }
  4837. if (reroll_array)
  4838. {
  4839. // For this case, we need to "re-roll" an array initializer from a temporary.
  4840. // We cannot simply pass the array directly, since it decays to a pointer and it cannot
  4841. // participate in a struct initializer. E.g.
  4842. // float arr[2] = { 1.0, 2.0 };
  4843. // Foo foo = { arr }; must be transformed to
  4844. // Foo foo = { { arr[0], arr[1] } };
  4845. // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
  4846. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
  4847. // as temporaries anyways.
  4848. return to_rerolled_array_expression(parent_type, to_enclosed_expression(id), type);
  4849. }
  4850. else
  4851. {
  4852. auto expr = to_unpacked_expression(id);
  4853. if (remapped_boolean)
  4854. {
  4855. auto tmp_type = type;
  4856. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4857. expr = join(type_to_glsl(tmp_type), "(", expr, ")");
  4858. }
  4859. return expr;
  4860. }
  4861. }
  4862. string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
  4863. {
  4864. string expr = to_expression(id);
  4865. if (has_decoration(id, DecorationNonUniform))
  4866. convert_non_uniform_expression(expr, id);
  4867. return expr;
  4868. }
  4869. string CompilerGLSL::to_atomic_ptr_expression(uint32_t id)
  4870. {
  4871. string expr = to_non_uniform_aware_expression(id);
  4872. // If we have naked pointer to POD, we need to dereference to get the proper ".value" resolve.
  4873. if (should_dereference(id))
  4874. expr = dereference_expression(expression_type(id), expr);
  4875. return expr;
  4876. }
  4877. string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
  4878. {
  4879. auto itr = invalid_expressions.find(id);
  4880. if (itr != end(invalid_expressions))
  4881. handle_invalid_expression(id);
  4882. if (ir.ids[id].get_type() == TypeExpression)
  4883. {
  4884. // We might have a more complex chain of dependencies.
  4885. // A possible scenario is that we
  4886. //
  4887. // %1 = OpLoad
  4888. // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
  4889. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
  4890. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
  4891. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
  4892. //
  4893. // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
  4894. // and see that we should not forward reads of the original variable.
  4895. auto &expr = get<SPIRExpression>(id);
  4896. for (uint32_t dep : expr.expression_dependencies)
  4897. if (invalid_expressions.find(dep) != end(invalid_expressions))
  4898. handle_invalid_expression(dep);
  4899. }
  4900. if (register_expression_read)
  4901. track_expression_read(id);
  4902. switch (ir.ids[id].get_type())
  4903. {
  4904. case TypeExpression:
  4905. {
  4906. auto &e = get<SPIRExpression>(id);
  4907. if (e.base_expression)
  4908. return to_enclosed_expression(e.base_expression) + e.expression;
  4909. else if (e.need_transpose)
  4910. {
  4911. // This should not be reached for access chains, since we always deal explicitly with transpose state
  4912. // when consuming an access chain expression.
  4913. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4914. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4915. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  4916. return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
  4917. is_packed, relaxed);
  4918. }
  4919. else if (flattened_structs.count(id))
  4920. {
  4921. return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
  4922. }
  4923. else
  4924. {
  4925. if (is_forcing_recompilation())
  4926. {
  4927. // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
  4928. // Avoid this by returning dummy expressions during this phase.
  4929. // Do not use empty expressions here, because those are sentinels for other cases.
  4930. return "_";
  4931. }
  4932. else
  4933. return e.expression;
  4934. }
  4935. }
  4936. case TypeConstant:
  4937. {
  4938. auto &c = get<SPIRConstant>(id);
  4939. auto &type = get<SPIRType>(c.constant_type);
  4940. // WorkGroupSize may be a constant.
  4941. if (has_decoration(c.self, DecorationBuiltIn))
  4942. return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
  4943. else if (c.specialization)
  4944. {
  4945. if (backend.workgroup_size_is_hidden)
  4946. {
  4947. int wg_index = get_constant_mapping_to_workgroup_component(c);
  4948. if (wg_index >= 0)
  4949. {
  4950. auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index));
  4951. if (type.basetype != SPIRType::UInt)
  4952. wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
  4953. return wg_size;
  4954. }
  4955. }
  4956. if (expression_is_forwarded(id))
  4957. return constant_expression(c);
  4958. return to_name(id);
  4959. }
  4960. else if (c.is_used_as_lut)
  4961. return to_name(id);
  4962. else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  4963. return to_name(id);
  4964. else if (!type.array.empty() && !backend.can_declare_arrays_inline)
  4965. return to_name(id);
  4966. else
  4967. return constant_expression(c);
  4968. }
  4969. case TypeConstantOp:
  4970. return to_name(id);
  4971. case TypeVariable:
  4972. {
  4973. auto &var = get<SPIRVariable>(id);
  4974. // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
  4975. // the variable has not been declared yet.
  4976. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
  4977. {
  4978. // We might try to load from a loop variable before it has been initialized.
  4979. // Prefer static expression and fallback to initializer.
  4980. if (var.static_expression)
  4981. return to_expression(var.static_expression);
  4982. else if (var.initializer)
  4983. return to_expression(var.initializer);
  4984. else
  4985. {
  4986. // We cannot declare the variable yet, so have to fake it.
  4987. uint32_t undef_id = ir.increase_bound_by(1);
  4988. return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression;
  4989. }
  4990. }
  4991. else if (var.deferred_declaration)
  4992. {
  4993. var.deferred_declaration = false;
  4994. return variable_decl(var);
  4995. }
  4996. else if (flattened_structs.count(id))
  4997. {
  4998. return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
  4999. }
  5000. else
  5001. {
  5002. auto &dec = ir.meta[var.self].decoration;
  5003. if (dec.builtin)
  5004. return builtin_to_glsl(dec.builtin_type, var.storage);
  5005. else
  5006. return to_name(id);
  5007. }
  5008. }
  5009. case TypeCombinedImageSampler:
  5010. // This type should never be taken the expression of directly.
  5011. // The intention is that texture sampling functions will extract the image and samplers
  5012. // separately and take their expressions as needed.
  5013. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
  5014. // expression ala sampler2D(texture, sampler).
  5015. SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
  5016. case TypeAccessChain:
  5017. // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
  5018. SPIRV_CROSS_THROW("Access chains have no default expression representation.");
  5019. default:
  5020. return to_name(id);
  5021. }
  5022. }
  5023. SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
  5024. {
  5025. if (auto *constant = maybe_get<SPIRConstant>(const_id))
  5026. {
  5027. const auto &type = get<SPIRType>(constant->constant_type);
  5028. if (is_array(type) || type.basetype == SPIRType::Struct)
  5029. return constant->subconstants;
  5030. if (is_matrix(type))
  5031. return SmallVector<ConstantID>(constant->m.id);
  5032. if (is_vector(type))
  5033. return SmallVector<ConstantID>(constant->m.c[0].id);
  5034. SPIRV_CROSS_THROW("Unexpected scalar constant!");
  5035. }
  5036. if (!const_composite_insert_ids.count(const_id))
  5037. SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
  5038. return const_composite_insert_ids[const_id];
  5039. }
  5040. void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
  5041. const SmallVector<ConstantID> &initializers)
  5042. {
  5043. auto &type = get<SPIRType>(type_id);
  5044. constant.specialization = true;
  5045. if (is_array(type) || type.basetype == SPIRType::Struct)
  5046. {
  5047. constant.subconstants = initializers;
  5048. }
  5049. else if (is_matrix(type))
  5050. {
  5051. constant.m.columns = type.columns;
  5052. for (uint32_t i = 0; i < type.columns; ++i)
  5053. {
  5054. constant.m.id[i] = initializers[i];
  5055. constant.m.c[i].vecsize = type.vecsize;
  5056. }
  5057. }
  5058. else if (is_vector(type))
  5059. {
  5060. constant.m.c[0].vecsize = type.vecsize;
  5061. for (uint32_t i = 0; i < type.vecsize; ++i)
  5062. constant.m.c[0].id[i] = initializers[i];
  5063. }
  5064. else
  5065. SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
  5066. }
  5067. void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
  5068. const SmallVector<ConstantID> &initializers)
  5069. {
  5070. if (maybe_get<SPIRConstantOp>(const_id))
  5071. {
  5072. const_composite_insert_ids[const_id] = initializers;
  5073. return;
  5074. }
  5075. auto &constant = set<SPIRConstant>(const_id, type_id);
  5076. fill_composite_constant(constant, type_id, initializers);
  5077. forwarded_temporaries.insert(const_id);
  5078. }
  5079. TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
  5080. {
  5081. auto &type = get<SPIRType>(type_id);
  5082. if (is_array(type))
  5083. return type.parent_type;
  5084. if (type.basetype == SPIRType::Struct)
  5085. return type.member_types[member_idx];
  5086. if (is_matrix(type))
  5087. return type.parent_type;
  5088. if (is_vector(type))
  5089. return type.parent_type;
  5090. SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
  5091. }
  5092. string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
  5093. {
  5094. auto &type = get<SPIRType>(cop.basetype);
  5095. bool binary = false;
  5096. bool unary = false;
  5097. string op;
  5098. if (is_legacy() && is_unsigned_opcode(cop.opcode))
  5099. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  5100. // TODO: Find a clean way to reuse emit_instruction.
  5101. switch (cop.opcode)
  5102. {
  5103. case OpSConvert:
  5104. case OpUConvert:
  5105. case OpFConvert:
  5106. op = type_to_glsl_constructor(type);
  5107. break;
  5108. #define GLSL_BOP(opname, x) \
  5109. case Op##opname: \
  5110. binary = true; \
  5111. op = x; \
  5112. break
  5113. #define GLSL_UOP(opname, x) \
  5114. case Op##opname: \
  5115. unary = true; \
  5116. op = x; \
  5117. break
  5118. GLSL_UOP(SNegate, "-");
  5119. GLSL_UOP(Not, "~");
  5120. GLSL_BOP(IAdd, "+");
  5121. GLSL_BOP(ISub, "-");
  5122. GLSL_BOP(IMul, "*");
  5123. GLSL_BOP(SDiv, "/");
  5124. GLSL_BOP(UDiv, "/");
  5125. GLSL_BOP(UMod, "%");
  5126. GLSL_BOP(SMod, "%");
  5127. GLSL_BOP(ShiftRightLogical, ">>");
  5128. GLSL_BOP(ShiftRightArithmetic, ">>");
  5129. GLSL_BOP(ShiftLeftLogical, "<<");
  5130. GLSL_BOP(BitwiseOr, "|");
  5131. GLSL_BOP(BitwiseXor, "^");
  5132. GLSL_BOP(BitwiseAnd, "&");
  5133. GLSL_BOP(LogicalOr, "||");
  5134. GLSL_BOP(LogicalAnd, "&&");
  5135. GLSL_UOP(LogicalNot, "!");
  5136. GLSL_BOP(LogicalEqual, "==");
  5137. GLSL_BOP(LogicalNotEqual, "!=");
  5138. GLSL_BOP(IEqual, "==");
  5139. GLSL_BOP(INotEqual, "!=");
  5140. GLSL_BOP(ULessThan, "<");
  5141. GLSL_BOP(SLessThan, "<");
  5142. GLSL_BOP(ULessThanEqual, "<=");
  5143. GLSL_BOP(SLessThanEqual, "<=");
  5144. GLSL_BOP(UGreaterThan, ">");
  5145. GLSL_BOP(SGreaterThan, ">");
  5146. GLSL_BOP(UGreaterThanEqual, ">=");
  5147. GLSL_BOP(SGreaterThanEqual, ">=");
  5148. case OpSRem:
  5149. {
  5150. uint32_t op0 = cop.arguments[0];
  5151. uint32_t op1 = cop.arguments[1];
  5152. return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  5153. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  5154. }
  5155. case OpSelect:
  5156. {
  5157. if (cop.arguments.size() < 3)
  5158. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5159. // This one is pretty annoying. It's triggered from
  5160. // uint(bool), int(bool) from spec constants.
  5161. // In order to preserve its compile-time constness in Vulkan GLSL,
  5162. // we need to reduce the OpSelect expression back to this simplified model.
  5163. // If we cannot, fail.
  5164. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
  5165. {
  5166. // Implement as a simple cast down below.
  5167. }
  5168. else
  5169. {
  5170. // Implement a ternary and pray the compiler understands it :)
  5171. return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
  5172. }
  5173. break;
  5174. }
  5175. case OpVectorShuffle:
  5176. {
  5177. string expr = type_to_glsl_constructor(type);
  5178. expr += "(";
  5179. uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
  5180. string left_arg = to_enclosed_expression(cop.arguments[0]);
  5181. string right_arg = to_enclosed_expression(cop.arguments[1]);
  5182. for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
  5183. {
  5184. uint32_t index = cop.arguments[i];
  5185. if (index == 0xFFFFFFFF)
  5186. {
  5187. SPIRConstant c;
  5188. c.constant_type = type.parent_type;
  5189. assert(type.parent_type != ID(0));
  5190. expr += constant_expression(c);
  5191. }
  5192. else if (index >= left_components)
  5193. {
  5194. expr += right_arg + "." + "xyzw"[index - left_components];
  5195. }
  5196. else
  5197. {
  5198. expr += left_arg + "." + "xyzw"[index];
  5199. }
  5200. if (i + 1 < uint32_t(cop.arguments.size()))
  5201. expr += ", ";
  5202. }
  5203. expr += ")";
  5204. return expr;
  5205. }
  5206. case OpCompositeExtract:
  5207. {
  5208. // Trivial vector extracts (of WorkGroupSize typically),
  5209. // punch through to the input spec constant if the composite is used as array size.
  5210. const auto *c = maybe_get<SPIRConstant>(cop.arguments[0]);
  5211. string expr;
  5212. if (c && cop.arguments.size() == 2 && c->is_used_as_array_length &&
  5213. !backend.supports_spec_constant_array_size &&
  5214. is_vector(get<SPIRType>(c->constant_type)))
  5215. {
  5216. expr = to_expression(c->specialization_constant_id(0, cop.arguments[1]));
  5217. }
  5218. else
  5219. {
  5220. expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
  5221. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  5222. }
  5223. return expr;
  5224. }
  5225. case OpCompositeInsert:
  5226. {
  5227. SmallVector<ConstantID> new_init = get_composite_constant_ids(cop.arguments[1]);
  5228. uint32_t idx;
  5229. uint32_t target_id = cop.self;
  5230. uint32_t target_type_id = cop.basetype;
  5231. // We have to drill down to the part we want to modify, and create new
  5232. // constants for each containing part.
  5233. for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
  5234. {
  5235. uint32_t new_const = ir.increase_bound_by(1);
  5236. uint32_t old_const = new_init[cop.arguments[idx]];
  5237. new_init[cop.arguments[idx]] = new_const;
  5238. set_composite_constant(target_id, target_type_id, new_init);
  5239. new_init = get_composite_constant_ids(old_const);
  5240. target_id = new_const;
  5241. target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]);
  5242. }
  5243. // Now replace the initializer with the one from this instruction.
  5244. new_init[cop.arguments[idx]] = cop.arguments[0];
  5245. set_composite_constant(target_id, target_type_id, new_init);
  5246. SPIRConstant tmp_const(cop.basetype);
  5247. fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]);
  5248. return constant_expression(tmp_const);
  5249. }
  5250. default:
  5251. // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
  5252. SPIRV_CROSS_THROW("Unimplemented spec constant op.");
  5253. }
  5254. uint32_t bit_width = 0;
  5255. if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  5256. bit_width = expression_type(cop.arguments[0]).width;
  5257. SPIRType::BaseType input_type;
  5258. bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
  5259. switch (cop.opcode)
  5260. {
  5261. case OpIEqual:
  5262. case OpINotEqual:
  5263. input_type = to_signed_basetype(bit_width);
  5264. break;
  5265. case OpSLessThan:
  5266. case OpSLessThanEqual:
  5267. case OpSGreaterThan:
  5268. case OpSGreaterThanEqual:
  5269. case OpSMod:
  5270. case OpSDiv:
  5271. case OpShiftRightArithmetic:
  5272. case OpSConvert:
  5273. case OpSNegate:
  5274. input_type = to_signed_basetype(bit_width);
  5275. break;
  5276. case OpULessThan:
  5277. case OpULessThanEqual:
  5278. case OpUGreaterThan:
  5279. case OpUGreaterThanEqual:
  5280. case OpUMod:
  5281. case OpUDiv:
  5282. case OpShiftRightLogical:
  5283. case OpUConvert:
  5284. input_type = to_unsigned_basetype(bit_width);
  5285. break;
  5286. default:
  5287. input_type = type.basetype;
  5288. break;
  5289. }
  5290. #undef GLSL_BOP
  5291. #undef GLSL_UOP
  5292. if (binary)
  5293. {
  5294. if (cop.arguments.size() < 2)
  5295. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5296. string cast_op0;
  5297. string cast_op1;
  5298. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
  5299. cop.arguments[1], skip_cast_if_equal_type);
  5300. if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
  5301. {
  5302. expected_type.basetype = input_type;
  5303. auto expr = bitcast_glsl_op(type, expected_type);
  5304. expr += '(';
  5305. expr += join(cast_op0, " ", op, " ", cast_op1);
  5306. expr += ')';
  5307. return expr;
  5308. }
  5309. else
  5310. return join("(", cast_op0, " ", op, " ", cast_op1, ")");
  5311. }
  5312. else if (unary)
  5313. {
  5314. if (cop.arguments.size() < 1)
  5315. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5316. // Auto-bitcast to result type as needed.
  5317. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
  5318. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
  5319. }
  5320. else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  5321. {
  5322. if (cop.arguments.size() < 1)
  5323. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5324. auto &arg_type = expression_type(cop.arguments[0]);
  5325. if (arg_type.width < type.width && input_type != arg_type.basetype)
  5326. {
  5327. auto expected = arg_type;
  5328. expected.basetype = input_type;
  5329. return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
  5330. }
  5331. else
  5332. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5333. }
  5334. else
  5335. {
  5336. if (cop.arguments.size() < 1)
  5337. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5338. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5339. }
  5340. }
  5341. string CompilerGLSL::constant_expression(const SPIRConstant &c,
  5342. bool inside_block_like_struct_scope,
  5343. bool inside_struct_scope)
  5344. {
  5345. auto &type = get<SPIRType>(c.constant_type);
  5346. if (is_pointer(type))
  5347. {
  5348. return backend.null_pointer_literal;
  5349. }
  5350. else if (c.is_null_array_specialized_length && backend.requires_matching_array_initializer)
  5351. {
  5352. require_extension_internal("GL_EXT_null_initializer");
  5353. return backend.constant_null_initializer;
  5354. }
  5355. else if (c.replicated && type.op != OpTypeArray)
  5356. {
  5357. if (type.op == OpTypeMatrix)
  5358. {
  5359. uint32_t num_elements = type.columns;
  5360. // GLSL does not allow the replication constructor for matrices
  5361. // mat4(vec4(0.0)) needs to be manually expanded to mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
  5362. std::string res;
  5363. res += type_to_glsl(type);
  5364. res += "(";
  5365. for (uint32_t i = 0; i < num_elements; i++)
  5366. {
  5367. res += to_expression(c.subconstants[0]);
  5368. if (i < num_elements - 1)
  5369. res += ", ";
  5370. }
  5371. res += ")";
  5372. return res;
  5373. }
  5374. else
  5375. {
  5376. return join(type_to_glsl(type), "(", to_expression(c.subconstants[0]), ")");
  5377. }
  5378. }
  5379. else if (!c.subconstants.empty())
  5380. {
  5381. // Handles Arrays and structures.
  5382. string res;
  5383. // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
  5384. // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
  5385. // Should look at ArrayStride here as well, but it's possible to declare a constant struct
  5386. // with Offset = 0, using no ArrayStride on the enclosed array type.
  5387. // A particular CTS test hits this scenario.
  5388. bool array_type_decays = inside_block_like_struct_scope &&
  5389. is_array(type) &&
  5390. !backend.array_is_value_type_in_buffer_blocks;
  5391. // Allow Metal to use the array<T> template to make arrays a value type
  5392. bool needs_trailing_tracket = false;
  5393. if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
  5394. !is_array(type))
  5395. {
  5396. res = type_to_glsl_constructor(type) + "{ ";
  5397. }
  5398. else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
  5399. is_array(type) && !array_type_decays)
  5400. {
  5401. const auto *p_type = &type;
  5402. SPIRType tmp_type { OpNop };
  5403. if (inside_struct_scope &&
  5404. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5405. type.basetype == SPIRType::Boolean)
  5406. {
  5407. tmp_type = type;
  5408. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5409. p_type = &tmp_type;
  5410. }
  5411. res = type_to_glsl_constructor(*p_type) + "({ ";
  5412. needs_trailing_tracket = true;
  5413. }
  5414. else if (backend.use_initializer_list)
  5415. {
  5416. res = "{ ";
  5417. }
  5418. else
  5419. {
  5420. res = type_to_glsl_constructor(type) + "(";
  5421. }
  5422. uint32_t subconstant_index = 0;
  5423. size_t num_elements = c.subconstants.size();
  5424. if (c.replicated)
  5425. {
  5426. if (type.array.size() != 1)
  5427. SPIRV_CROSS_THROW("Multidimensional arrays not yet supported as replicated constans");
  5428. num_elements = type.array[0];
  5429. }
  5430. for (size_t i = 0; i < num_elements; i++)
  5431. {
  5432. auto &elem = c.subconstants[c.replicated ? 0 : i];
  5433. if (auto *op = maybe_get<SPIRConstantOp>(elem))
  5434. {
  5435. res += constant_op_expression(*op);
  5436. }
  5437. else if (maybe_get<SPIRUndef>(elem) != nullptr)
  5438. {
  5439. res += to_name(elem);
  5440. }
  5441. else
  5442. {
  5443. auto &subc = get<SPIRConstant>(elem);
  5444. if (subc.specialization && !expression_is_forwarded(elem))
  5445. res += to_name(elem);
  5446. else
  5447. {
  5448. if (!is_array(type) && type.basetype == SPIRType::Struct)
  5449. {
  5450. // When we get down to emitting struct members, override the block-like information.
  5451. // For constants, we can freely mix and match block-like state.
  5452. inside_block_like_struct_scope =
  5453. has_member_decoration(type.self, subconstant_index, DecorationOffset);
  5454. }
  5455. if (type.basetype == SPIRType::Struct)
  5456. inside_struct_scope = true;
  5457. res += constant_expression(subc, inside_block_like_struct_scope, inside_struct_scope);
  5458. }
  5459. }
  5460. if (i != num_elements - 1)
  5461. res += ", ";
  5462. subconstant_index++;
  5463. }
  5464. res += backend.use_initializer_list ? " }" : ")";
  5465. if (needs_trailing_tracket)
  5466. res += ")";
  5467. return res;
  5468. }
  5469. else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
  5470. {
  5471. // Metal tessellation likes empty structs which are then constant expressions.
  5472. if (backend.supports_empty_struct)
  5473. return "{ }";
  5474. else if (backend.use_typed_initializer_list)
  5475. return join(type_to_glsl(type), "{ 0 }");
  5476. else if (backend.use_initializer_list)
  5477. return "{ 0 }";
  5478. else
  5479. return join(type_to_glsl(type), "(0)");
  5480. }
  5481. else if (c.columns() == 1 && type.op != OpTypeCooperativeMatrixKHR)
  5482. {
  5483. auto res = constant_expression_vector(c, 0);
  5484. if (inside_struct_scope &&
  5485. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5486. type.basetype == SPIRType::Boolean)
  5487. {
  5488. SPIRType tmp_type = type;
  5489. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5490. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5491. }
  5492. return res;
  5493. }
  5494. else
  5495. {
  5496. string res = type_to_glsl(type) + "(";
  5497. for (uint32_t col = 0; col < c.columns(); col++)
  5498. {
  5499. if (c.specialization_constant_id(col) != 0)
  5500. res += to_name(c.specialization_constant_id(col));
  5501. else
  5502. res += constant_expression_vector(c, col);
  5503. if (col + 1 < c.columns())
  5504. res += ", ";
  5505. }
  5506. res += ")";
  5507. if (inside_struct_scope &&
  5508. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5509. type.basetype == SPIRType::Boolean)
  5510. {
  5511. SPIRType tmp_type = type;
  5512. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5513. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5514. }
  5515. return res;
  5516. }
  5517. }
  5518. #ifdef _MSC_VER
  5519. // snprintf does not exist or is buggy on older MSVC versions, some of them
  5520. // being used by MinGW. Use sprintf instead and disable corresponding warning.
  5521. #pragma warning(push)
  5522. #pragma warning(disable : 4996)
  5523. #endif
  5524. string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5525. {
  5526. string res;
  5527. float float_value = c.scalar_floate4m3(col, row);
  5528. // There is no infinity in e4m3.
  5529. if (std::isnan(float_value))
  5530. {
  5531. SPIRType type { OpTypeFloat };
  5532. type.basetype = SPIRType::Half;
  5533. type.vecsize = 1;
  5534. type.columns = 1;
  5535. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  5536. }
  5537. else
  5538. {
  5539. SPIRType type { OpTypeFloat };
  5540. type.basetype = SPIRType::FloatE4M3;
  5541. type.vecsize = 1;
  5542. type.columns = 1;
  5543. res = join(type_to_glsl(type), "(", format_float(float_value), ")");
  5544. }
  5545. return res;
  5546. }
  5547. string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5548. {
  5549. string res;
  5550. bool is_bfloat8 = get<SPIRType>(c.constant_type).basetype == SPIRType::FloatE5M2;
  5551. float float_value = is_bfloat8 ? c.scalar_bf8(col, row) : c.scalar_f16(col, row);
  5552. // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
  5553. // of complicated workarounds, just value-cast to the half type always.
  5554. if (std::isnan(float_value) || std::isinf(float_value))
  5555. {
  5556. SPIRType type { OpTypeFloat };
  5557. type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
  5558. type.vecsize = 1;
  5559. type.columns = 1;
  5560. if (float_value == numeric_limits<float>::infinity())
  5561. res = join(type_to_glsl(type), "(1.0 / 0.0)");
  5562. else if (float_value == -numeric_limits<float>::infinity())
  5563. res = join(type_to_glsl(type), "(-1.0 / 0.0)");
  5564. else if (std::isnan(float_value))
  5565. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  5566. else
  5567. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5568. }
  5569. else
  5570. {
  5571. SPIRType type { OpTypeFloat };
  5572. type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
  5573. type.vecsize = 1;
  5574. type.columns = 1;
  5575. res = join(type_to_glsl(type), "(", format_float(float_value), ")");
  5576. }
  5577. return res;
  5578. }
  5579. string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5580. {
  5581. string res;
  5582. bool is_bfloat16 = get<SPIRType>(c.constant_type).basetype == SPIRType::BFloat16;
  5583. float float_value = is_bfloat16 ? c.scalar_bf16(col, row) : c.scalar_f32(col, row);
  5584. if (std::isnan(float_value) || std::isinf(float_value))
  5585. {
  5586. // Use special representation.
  5587. if (!is_legacy())
  5588. {
  5589. SPIRType out_type { OpTypeFloat };
  5590. SPIRType in_type { OpTypeInt };
  5591. out_type.basetype = SPIRType::Float;
  5592. in_type.basetype = SPIRType::UInt;
  5593. out_type.vecsize = 1;
  5594. in_type.vecsize = 1;
  5595. out_type.width = 32;
  5596. in_type.width = 32;
  5597. char print_buffer[32];
  5598. #ifdef _WIN32
  5599. sprintf(print_buffer, "0x%xu", c.scalar(col, row));
  5600. #else
  5601. snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row));
  5602. #endif
  5603. const char *comment = "inf";
  5604. if (float_value == -numeric_limits<float>::infinity())
  5605. comment = "-inf";
  5606. else if (std::isnan(float_value))
  5607. comment = "nan";
  5608. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5609. }
  5610. else
  5611. {
  5612. if (float_value == numeric_limits<float>::infinity())
  5613. {
  5614. if (backend.float_literal_suffix)
  5615. res = "(1.0f / 0.0f)";
  5616. else
  5617. res = "(1.0 / 0.0)";
  5618. }
  5619. else if (float_value == -numeric_limits<float>::infinity())
  5620. {
  5621. if (backend.float_literal_suffix)
  5622. res = "(-1.0f / 0.0f)";
  5623. else
  5624. res = "(-1.0 / 0.0)";
  5625. }
  5626. else if (std::isnan(float_value))
  5627. {
  5628. if (backend.float_literal_suffix)
  5629. res = "(0.0f / 0.0f)";
  5630. else
  5631. res = "(0.0 / 0.0)";
  5632. }
  5633. else
  5634. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5635. }
  5636. }
  5637. else
  5638. {
  5639. res = format_float(float_value);
  5640. if (backend.float_literal_suffix)
  5641. res += "f";
  5642. }
  5643. if (is_bfloat16)
  5644. res = join("bfloat16_t(", res, ")");
  5645. return res;
  5646. }
  5647. std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5648. {
  5649. string res;
  5650. double double_value = c.scalar_f64(col, row);
  5651. if (std::isnan(double_value) || std::isinf(double_value))
  5652. {
  5653. // Use special representation.
  5654. if (!is_legacy())
  5655. {
  5656. SPIRType out_type { OpTypeFloat };
  5657. SPIRType in_type { OpTypeInt };
  5658. out_type.basetype = SPIRType::Double;
  5659. in_type.basetype = SPIRType::UInt64;
  5660. out_type.vecsize = 1;
  5661. in_type.vecsize = 1;
  5662. out_type.width = 64;
  5663. in_type.width = 64;
  5664. uint64_t u64_value = c.scalar_u64(col, row);
  5665. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  5666. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  5667. require_extension_internal("GL_ARB_gpu_shader_int64");
  5668. char print_buffer[64];
  5669. #ifdef _WIN32
  5670. sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5671. backend.long_long_literal_suffix ? "ull" : "ul");
  5672. #else
  5673. snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5674. backend.long_long_literal_suffix ? "ull" : "ul");
  5675. #endif
  5676. const char *comment = "inf";
  5677. if (double_value == -numeric_limits<double>::infinity())
  5678. comment = "-inf";
  5679. else if (std::isnan(double_value))
  5680. comment = "nan";
  5681. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5682. }
  5683. else
  5684. {
  5685. if (options.es)
  5686. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  5687. if (options.version < 400)
  5688. require_extension_internal("GL_ARB_gpu_shader_fp64");
  5689. if (double_value == numeric_limits<double>::infinity())
  5690. {
  5691. if (backend.double_literal_suffix)
  5692. res = "(1.0lf / 0.0lf)";
  5693. else
  5694. res = "(1.0 / 0.0)";
  5695. }
  5696. else if (double_value == -numeric_limits<double>::infinity())
  5697. {
  5698. if (backend.double_literal_suffix)
  5699. res = "(-1.0lf / 0.0lf)";
  5700. else
  5701. res = "(-1.0 / 0.0)";
  5702. }
  5703. else if (std::isnan(double_value))
  5704. {
  5705. if (backend.double_literal_suffix)
  5706. res = "(0.0lf / 0.0lf)";
  5707. else
  5708. res = "(0.0 / 0.0)";
  5709. }
  5710. else
  5711. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5712. }
  5713. }
  5714. else
  5715. {
  5716. res = format_double(double_value);
  5717. if (backend.double_literal_suffix)
  5718. res += "lf";
  5719. }
  5720. return res;
  5721. }
  5722. #ifdef _MSC_VER
  5723. #pragma warning(pop)
  5724. #endif
  5725. string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
  5726. {
  5727. auto type = get<SPIRType>(c.constant_type);
  5728. type.columns = 1;
  5729. auto scalar_type = type;
  5730. scalar_type.vecsize = 1;
  5731. string res;
  5732. bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
  5733. bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
  5734. if (!type_is_floating_point(type))
  5735. {
  5736. // Cannot swizzle literal integers as a special case.
  5737. swizzle_splat = false;
  5738. }
  5739. if (splat || swizzle_splat)
  5740. {
  5741. // Cannot use constant splatting if we have specialization constants somewhere in the vector.
  5742. for (uint32_t i = 0; i < c.vector_size(); i++)
  5743. {
  5744. if (c.specialization_constant_id(vector, i) != 0)
  5745. {
  5746. splat = false;
  5747. swizzle_splat = false;
  5748. break;
  5749. }
  5750. }
  5751. }
  5752. if (splat || swizzle_splat)
  5753. {
  5754. if (type.width == 64)
  5755. {
  5756. uint64_t ident = c.scalar_u64(vector, 0);
  5757. for (uint32_t i = 1; i < c.vector_size(); i++)
  5758. {
  5759. if (ident != c.scalar_u64(vector, i))
  5760. {
  5761. splat = false;
  5762. swizzle_splat = false;
  5763. break;
  5764. }
  5765. }
  5766. }
  5767. else
  5768. {
  5769. uint32_t ident = c.scalar(vector, 0);
  5770. for (uint32_t i = 1; i < c.vector_size(); i++)
  5771. {
  5772. if (ident != c.scalar(vector, i))
  5773. {
  5774. splat = false;
  5775. swizzle_splat = false;
  5776. }
  5777. }
  5778. }
  5779. }
  5780. if (c.vector_size() > 1 && !swizzle_splat)
  5781. res += type_to_glsl(type) + "(";
  5782. switch (type.basetype)
  5783. {
  5784. case SPIRType::FloatE4M3:
  5785. if (splat || swizzle_splat)
  5786. {
  5787. res += convert_floate4m3_to_string(c, vector, 0);
  5788. if (swizzle_splat)
  5789. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5790. }
  5791. else
  5792. {
  5793. for (uint32_t i = 0; i < c.vector_size(); i++)
  5794. {
  5795. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5796. res += to_expression(c.specialization_constant_id(vector, i));
  5797. else
  5798. res += convert_floate4m3_to_string(c, vector, i);
  5799. if (i + 1 < c.vector_size())
  5800. res += ", ";
  5801. }
  5802. }
  5803. break;
  5804. case SPIRType::FloatE5M2:
  5805. case SPIRType::Half:
  5806. if (splat || swizzle_splat)
  5807. {
  5808. res += convert_half_to_string(c, vector, 0);
  5809. if (swizzle_splat)
  5810. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5811. }
  5812. else
  5813. {
  5814. for (uint32_t i = 0; i < c.vector_size(); i++)
  5815. {
  5816. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5817. res += to_expression(c.specialization_constant_id(vector, i));
  5818. else
  5819. res += convert_half_to_string(c, vector, i);
  5820. if (i + 1 < c.vector_size())
  5821. res += ", ";
  5822. }
  5823. }
  5824. break;
  5825. case SPIRType::BFloat16:
  5826. case SPIRType::Float:
  5827. if (splat || swizzle_splat)
  5828. {
  5829. res += convert_float_to_string(c, vector, 0);
  5830. if (swizzle_splat)
  5831. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5832. }
  5833. else
  5834. {
  5835. for (uint32_t i = 0; i < c.vector_size(); i++)
  5836. {
  5837. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5838. res += to_expression(c.specialization_constant_id(vector, i));
  5839. else
  5840. res += convert_float_to_string(c, vector, i);
  5841. if (i + 1 < c.vector_size())
  5842. res += ", ";
  5843. }
  5844. }
  5845. break;
  5846. case SPIRType::Double:
  5847. if (splat || swizzle_splat)
  5848. {
  5849. res += convert_double_to_string(c, vector, 0);
  5850. if (swizzle_splat)
  5851. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5852. }
  5853. else
  5854. {
  5855. for (uint32_t i = 0; i < c.vector_size(); i++)
  5856. {
  5857. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5858. res += to_expression(c.specialization_constant_id(vector, i));
  5859. else
  5860. res += convert_double_to_string(c, vector, i);
  5861. if (i + 1 < c.vector_size())
  5862. res += ", ";
  5863. }
  5864. }
  5865. break;
  5866. case SPIRType::Int64:
  5867. {
  5868. auto tmp = type;
  5869. tmp.vecsize = 1;
  5870. tmp.columns = 1;
  5871. auto int64_type = type_to_glsl(tmp);
  5872. if (splat)
  5873. {
  5874. res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix);
  5875. }
  5876. else
  5877. {
  5878. for (uint32_t i = 0; i < c.vector_size(); i++)
  5879. {
  5880. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5881. res += to_expression(c.specialization_constant_id(vector, i));
  5882. else
  5883. res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
  5884. if (i + 1 < c.vector_size())
  5885. res += ", ";
  5886. }
  5887. }
  5888. break;
  5889. }
  5890. case SPIRType::UInt64:
  5891. if (splat)
  5892. {
  5893. res += convert_to_string(c.scalar_u64(vector, 0));
  5894. if (backend.long_long_literal_suffix)
  5895. res += "ull";
  5896. else
  5897. res += "ul";
  5898. }
  5899. else
  5900. {
  5901. for (uint32_t i = 0; i < c.vector_size(); i++)
  5902. {
  5903. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5904. res += to_expression(c.specialization_constant_id(vector, i));
  5905. else
  5906. {
  5907. res += convert_to_string(c.scalar_u64(vector, i));
  5908. if (backend.long_long_literal_suffix)
  5909. res += "ull";
  5910. else
  5911. res += "ul";
  5912. }
  5913. if (i + 1 < c.vector_size())
  5914. res += ", ";
  5915. }
  5916. }
  5917. break;
  5918. case SPIRType::UInt:
  5919. if (splat)
  5920. {
  5921. res += convert_to_string(c.scalar(vector, 0));
  5922. if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
  5923. {
  5924. // Fake unsigned constant literals with signed ones if possible.
  5925. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5926. if (c.scalar_i32(vector, 0) < 0)
  5927. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
  5928. }
  5929. else if (backend.uint32_t_literal_suffix)
  5930. res += "u";
  5931. }
  5932. else
  5933. {
  5934. for (uint32_t i = 0; i < c.vector_size(); i++)
  5935. {
  5936. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5937. res += to_expression(c.specialization_constant_id(vector, i));
  5938. else
  5939. {
  5940. res += convert_to_string(c.scalar(vector, i));
  5941. if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
  5942. {
  5943. // Fake unsigned constant literals with signed ones if possible.
  5944. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5945. if (c.scalar_i32(vector, i) < 0)
  5946. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
  5947. "the literal negative.");
  5948. }
  5949. else if (backend.uint32_t_literal_suffix)
  5950. res += "u";
  5951. }
  5952. if (i + 1 < c.vector_size())
  5953. res += ", ";
  5954. }
  5955. }
  5956. break;
  5957. case SPIRType::Int:
  5958. if (splat)
  5959. res += convert_to_string(c.scalar_i32(vector, 0));
  5960. else
  5961. {
  5962. for (uint32_t i = 0; i < c.vector_size(); i++)
  5963. {
  5964. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5965. res += to_expression(c.specialization_constant_id(vector, i));
  5966. else
  5967. res += convert_to_string(c.scalar_i32(vector, i));
  5968. if (i + 1 < c.vector_size())
  5969. res += ", ";
  5970. }
  5971. }
  5972. break;
  5973. case SPIRType::UShort:
  5974. if (splat)
  5975. {
  5976. res += convert_to_string(c.scalar(vector, 0));
  5977. }
  5978. else
  5979. {
  5980. for (uint32_t i = 0; i < c.vector_size(); i++)
  5981. {
  5982. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5983. res += to_expression(c.specialization_constant_id(vector, i));
  5984. else
  5985. {
  5986. if (*backend.uint16_t_literal_suffix)
  5987. {
  5988. res += convert_to_string(c.scalar_u16(vector, i));
  5989. res += backend.uint16_t_literal_suffix;
  5990. }
  5991. else
  5992. {
  5993. // If backend doesn't have a literal suffix, we need to value cast.
  5994. res += type_to_glsl(scalar_type);
  5995. res += "(";
  5996. res += convert_to_string(c.scalar_u16(vector, i));
  5997. res += ")";
  5998. }
  5999. }
  6000. if (i + 1 < c.vector_size())
  6001. res += ", ";
  6002. }
  6003. }
  6004. break;
  6005. case SPIRType::Short:
  6006. if (splat)
  6007. {
  6008. res += convert_to_string(c.scalar_i16(vector, 0));
  6009. }
  6010. else
  6011. {
  6012. for (uint32_t i = 0; i < c.vector_size(); i++)
  6013. {
  6014. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6015. res += to_expression(c.specialization_constant_id(vector, i));
  6016. else
  6017. {
  6018. if (*backend.int16_t_literal_suffix)
  6019. {
  6020. res += convert_to_string(c.scalar_i16(vector, i));
  6021. res += backend.int16_t_literal_suffix;
  6022. }
  6023. else
  6024. {
  6025. // If backend doesn't have a literal suffix, we need to value cast.
  6026. res += type_to_glsl(scalar_type);
  6027. res += "(";
  6028. res += convert_to_string(c.scalar_i16(vector, i));
  6029. res += ")";
  6030. }
  6031. }
  6032. if (i + 1 < c.vector_size())
  6033. res += ", ";
  6034. }
  6035. }
  6036. break;
  6037. case SPIRType::UByte:
  6038. if (splat)
  6039. {
  6040. res += convert_to_string(c.scalar_u8(vector, 0));
  6041. }
  6042. else
  6043. {
  6044. for (uint32_t i = 0; i < c.vector_size(); i++)
  6045. {
  6046. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6047. res += to_expression(c.specialization_constant_id(vector, i));
  6048. else
  6049. {
  6050. res += type_to_glsl(scalar_type);
  6051. res += "(";
  6052. res += convert_to_string(c.scalar_u8(vector, i));
  6053. res += ")";
  6054. }
  6055. if (i + 1 < c.vector_size())
  6056. res += ", ";
  6057. }
  6058. }
  6059. break;
  6060. case SPIRType::SByte:
  6061. if (splat)
  6062. {
  6063. res += convert_to_string(c.scalar_i8(vector, 0));
  6064. }
  6065. else
  6066. {
  6067. for (uint32_t i = 0; i < c.vector_size(); i++)
  6068. {
  6069. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6070. res += to_expression(c.specialization_constant_id(vector, i));
  6071. else
  6072. {
  6073. res += type_to_glsl(scalar_type);
  6074. res += "(";
  6075. res += convert_to_string(c.scalar_i8(vector, i));
  6076. res += ")";
  6077. }
  6078. if (i + 1 < c.vector_size())
  6079. res += ", ";
  6080. }
  6081. }
  6082. break;
  6083. case SPIRType::Boolean:
  6084. if (splat)
  6085. res += c.scalar(vector, 0) ? "true" : "false";
  6086. else
  6087. {
  6088. for (uint32_t i = 0; i < c.vector_size(); i++)
  6089. {
  6090. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6091. res += to_expression(c.specialization_constant_id(vector, i));
  6092. else
  6093. res += c.scalar(vector, i) ? "true" : "false";
  6094. if (i + 1 < c.vector_size())
  6095. res += ", ";
  6096. }
  6097. }
  6098. break;
  6099. default:
  6100. SPIRV_CROSS_THROW("Invalid constant expression basetype.");
  6101. }
  6102. if (c.vector_size() > 1 && !swizzle_splat)
  6103. res += ")";
  6104. return res;
  6105. }
  6106. SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
  6107. {
  6108. forced_temporaries.insert(id);
  6109. emit_uninitialized_temporary(type, id);
  6110. return set<SPIRExpression>(id, to_name(id), type, true);
  6111. }
  6112. void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
  6113. {
  6114. // If we're declaring temporaries inside continue blocks,
  6115. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  6116. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  6117. {
  6118. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  6119. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  6120. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  6121. return tmp.first == result_type && tmp.second == result_id;
  6122. }) == end(header.declare_temporary))
  6123. {
  6124. header.declare_temporary.emplace_back(result_type, result_id);
  6125. hoisted_temporaries.insert(result_id);
  6126. force_recompile();
  6127. }
  6128. }
  6129. else if (hoisted_temporaries.count(result_id) == 0)
  6130. {
  6131. auto &type = get<SPIRType>(result_type);
  6132. auto &flags = get_decoration_bitset(result_id);
  6133. // The result_id has not been made into an expression yet, so use flags interface.
  6134. add_local_variable_name(result_id);
  6135. string initializer;
  6136. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  6137. initializer = join(" = ", to_zero_initialized_expression(result_type));
  6138. statement(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), initializer, ";");
  6139. }
  6140. }
  6141. bool CompilerGLSL::can_declare_inline_temporary(uint32_t id) const
  6142. {
  6143. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(id))
  6144. return false;
  6145. if (hoisted_temporaries.count(id))
  6146. return false;
  6147. return true;
  6148. }
  6149. string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
  6150. {
  6151. auto &type = get<SPIRType>(result_type);
  6152. // If we're declaring temporaries inside continue blocks,
  6153. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  6154. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  6155. {
  6156. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  6157. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  6158. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  6159. return tmp.first == result_type && tmp.second == result_id;
  6160. }) == end(header.declare_temporary))
  6161. {
  6162. header.declare_temporary.emplace_back(result_type, result_id);
  6163. hoisted_temporaries.insert(result_id);
  6164. force_recompile_guarantee_forward_progress();
  6165. }
  6166. return join(to_name(result_id), " = ");
  6167. }
  6168. else if (hoisted_temporaries.count(result_id))
  6169. {
  6170. // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
  6171. return join(to_name(result_id), " = ");
  6172. }
  6173. else
  6174. {
  6175. // The result_id has not been made into an expression yet, so use flags interface.
  6176. add_local_variable_name(result_id);
  6177. auto &flags = get_decoration_bitset(result_id);
  6178. return join(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), " = ");
  6179. }
  6180. }
  6181. bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
  6182. {
  6183. return forwarded_temporaries.count(id) != 0;
  6184. }
  6185. bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
  6186. {
  6187. return suppressed_usage_tracking.count(id) != 0;
  6188. }
  6189. bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
  6190. {
  6191. auto *expr = maybe_get<SPIRExpression>(id);
  6192. if (!expr)
  6193. return false;
  6194. // If we're emitting code at a deeper loop level than when we emitted the expression,
  6195. // we're probably reading the same expression over and over.
  6196. return current_loop_level > expr->emitted_loop_level;
  6197. }
  6198. SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
  6199. bool suppress_usage_tracking)
  6200. {
  6201. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  6202. {
  6203. // Just forward it without temporary.
  6204. // If the forward is trivial, we do not force flushing to temporary for this expression.
  6205. forwarded_temporaries.insert(result_id);
  6206. if (suppress_usage_tracking)
  6207. suppressed_usage_tracking.insert(result_id);
  6208. return set<SPIRExpression>(result_id, rhs, result_type, true);
  6209. }
  6210. else
  6211. {
  6212. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  6213. statement(declare_temporary(result_type, result_id), rhs, ";");
  6214. return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  6215. }
  6216. }
  6217. void CompilerGLSL::emit_transposed_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding)
  6218. {
  6219. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  6220. {
  6221. // Just forward it without temporary.
  6222. // If the forward is trivial, we do not force flushing to temporary for this expression.
  6223. forwarded_temporaries.insert(result_id);
  6224. auto &e = set<SPIRExpression>(result_id, rhs, result_type, true);
  6225. e.need_transpose = true;
  6226. }
  6227. else if (can_declare_inline_temporary(result_id))
  6228. {
  6229. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  6230. // Since the expression is transposed, we have to ensure the temporary is the transposed type.
  6231. auto &transposed_type_id = extra_sub_expressions[result_id];
  6232. if (!transposed_type_id)
  6233. {
  6234. auto dummy_type = get<SPIRType>(result_type);
  6235. std::swap(dummy_type.columns, dummy_type.vecsize);
  6236. transposed_type_id = ir.increase_bound_by(1);
  6237. set<SPIRType>(transposed_type_id, dummy_type);
  6238. }
  6239. statement(declare_temporary(transposed_type_id, result_id), rhs, ";");
  6240. auto &e = set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  6241. e.need_transpose = true;
  6242. }
  6243. else
  6244. {
  6245. // If we cannot declare the temporary because it's already been hoisted, we don't have the
  6246. // chance to override the temporary type ourselves. Just transpose() the expression.
  6247. emit_op(result_type, result_id, join("transpose(", rhs, ")"), forwarding);
  6248. }
  6249. }
  6250. void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6251. {
  6252. bool forward = should_forward(op0);
  6253. emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
  6254. inherit_expression_dependencies(result_id, op0);
  6255. }
  6256. void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6257. {
  6258. auto &type = get<SPIRType>(result_type);
  6259. bool forward = should_forward(op0);
  6260. emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward);
  6261. inherit_expression_dependencies(result_id, op0);
  6262. }
  6263. void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block)
  6264. {
  6265. statement("EmitMeshTasksEXT(",
  6266. to_unpacked_expression(block.mesh.groups[0]), ", ",
  6267. to_unpacked_expression(block.mesh.groups[1]), ", ",
  6268. to_unpacked_expression(block.mesh.groups[2]), ");");
  6269. }
  6270. void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  6271. {
  6272. // Various FP arithmetic opcodes such as add, sub, mul will hit this.
  6273. bool force_temporary_precise = backend.support_precise_qualifier &&
  6274. has_legacy_nocontract(result_type, result_id) &&
  6275. type_is_floating_point(get<SPIRType>(result_type));
  6276. bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
  6277. emit_op(result_type, result_id,
  6278. join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
  6279. inherit_expression_dependencies(result_id, op0);
  6280. inherit_expression_dependencies(result_id, op1);
  6281. }
  6282. void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
  6283. {
  6284. auto &type = get<SPIRType>(result_type);
  6285. auto expr = type_to_glsl_constructor(type);
  6286. expr += '(';
  6287. for (uint32_t i = 0; i < type.vecsize; i++)
  6288. {
  6289. // Make sure to call to_expression multiple times to ensure
  6290. // that these expressions are properly flushed to temporaries if needed.
  6291. expr += op;
  6292. expr += to_extract_component_expression(operand, i);
  6293. if (i + 1 < type.vecsize)
  6294. expr += ", ";
  6295. }
  6296. expr += ')';
  6297. emit_op(result_type, result_id, expr, should_forward(operand));
  6298. inherit_expression_dependencies(result_id, operand);
  6299. }
  6300. void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6301. const char *op, bool negate, SPIRType::BaseType expected_type)
  6302. {
  6303. auto &type0 = expression_type(op0);
  6304. auto &type1 = expression_type(op1);
  6305. SPIRType target_type0 = type0;
  6306. SPIRType target_type1 = type1;
  6307. target_type0.basetype = expected_type;
  6308. target_type1.basetype = expected_type;
  6309. target_type0.vecsize = 1;
  6310. target_type1.vecsize = 1;
  6311. auto &type = get<SPIRType>(result_type);
  6312. auto expr = type_to_glsl_constructor(type);
  6313. expr += '(';
  6314. for (uint32_t i = 0; i < type.vecsize; i++)
  6315. {
  6316. // Make sure to call to_expression multiple times to ensure
  6317. // that these expressions are properly flushed to temporaries if needed.
  6318. if (negate)
  6319. expr += "!(";
  6320. if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
  6321. expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
  6322. else
  6323. expr += to_extract_component_expression(op0, i);
  6324. expr += ' ';
  6325. expr += op;
  6326. expr += ' ';
  6327. if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
  6328. expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
  6329. else
  6330. expr += to_extract_component_expression(op1, i);
  6331. if (negate)
  6332. expr += ")";
  6333. if (i + 1 < type.vecsize)
  6334. expr += ", ";
  6335. }
  6336. expr += ')';
  6337. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6338. inherit_expression_dependencies(result_id, op0);
  6339. inherit_expression_dependencies(result_id, op1);
  6340. }
  6341. SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
  6342. uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
  6343. {
  6344. auto &type0 = expression_type(op0);
  6345. auto &type1 = expression_type(op1);
  6346. // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
  6347. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
  6348. // since equality test is exactly the same.
  6349. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
  6350. // Create a fake type so we can bitcast to it.
  6351. // We only deal with regular arithmetic types here like int, uints and so on.
  6352. SPIRType expected_type{type0.op};
  6353. expected_type.basetype = input_type;
  6354. expected_type.vecsize = type0.vecsize;
  6355. expected_type.columns = type0.columns;
  6356. expected_type.width = type0.width;
  6357. if (cast)
  6358. {
  6359. cast_op0 = bitcast_glsl(expected_type, op0);
  6360. cast_op1 = bitcast_glsl(expected_type, op1);
  6361. }
  6362. else
  6363. {
  6364. // If we don't cast, our actual input type is that of the first (or second) argument.
  6365. cast_op0 = to_enclosed_unpacked_expression(op0);
  6366. cast_op1 = to_enclosed_unpacked_expression(op1);
  6367. input_type = type0.basetype;
  6368. }
  6369. return expected_type;
  6370. }
  6371. bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
  6372. {
  6373. // Some bitcasts may require complex casting sequences, and are implemented here.
  6374. // Otherwise a simply unary function will do with bitcast_glsl_op.
  6375. auto &output_type = get<SPIRType>(result_type);
  6376. auto &input_type = expression_type(op0);
  6377. string expr;
  6378. if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
  6379. expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
  6380. else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
  6381. input_type.vecsize == 2)
  6382. expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
  6383. else
  6384. return false;
  6385. emit_op(result_type, id, expr, should_forward(op0));
  6386. return true;
  6387. }
  6388. void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6389. const char *op, SPIRType::BaseType input_type,
  6390. bool skip_cast_if_equal_type,
  6391. bool implicit_integer_promotion)
  6392. {
  6393. string cast_op0, cast_op1;
  6394. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  6395. auto &out_type = get<SPIRType>(result_type);
  6396. // We might have casted away from the result type, so bitcast again.
  6397. // For example, arithmetic right shift with uint inputs.
  6398. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  6399. auto bitop = join(cast_op0, " ", op, " ", cast_op1);
  6400. string expr;
  6401. if (implicit_integer_promotion)
  6402. {
  6403. // Simple value cast.
  6404. expr = join(type_to_glsl(out_type), '(', bitop, ')');
  6405. }
  6406. else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  6407. {
  6408. expected_type.basetype = input_type;
  6409. expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')');
  6410. }
  6411. else
  6412. {
  6413. expr = std::move(bitop);
  6414. }
  6415. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6416. inherit_expression_dependencies(result_id, op0);
  6417. inherit_expression_dependencies(result_id, op1);
  6418. }
  6419. void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6420. {
  6421. bool forward = should_forward(op0);
  6422. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
  6423. inherit_expression_dependencies(result_id, op0);
  6424. }
  6425. void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6426. const char *op)
  6427. {
  6428. // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL
  6429. const auto &type = get_type(result_type);
  6430. bool must_forward = type_is_opaque_value(type);
  6431. bool forward = must_forward || (should_forward(op0) && should_forward(op1));
  6432. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
  6433. forward);
  6434. inherit_expression_dependencies(result_id, op0);
  6435. inherit_expression_dependencies(result_id, op1);
  6436. }
  6437. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6438. const char *op)
  6439. {
  6440. auto &type = get<SPIRType>(result_type);
  6441. if (type_is_floating_point(type))
  6442. {
  6443. if (!options.vulkan_semantics)
  6444. SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
  6445. if (options.es)
  6446. SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
  6447. require_extension_internal("GL_EXT_shader_atomic_float");
  6448. }
  6449. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  6450. require_extension_internal("GL_EXT_shader_atomic_int64");
  6451. forced_temporaries.insert(result_id);
  6452. emit_op(result_type, result_id,
  6453. join(op, "(", to_atomic_ptr_expression(op0), ", ",
  6454. to_unpacked_expression(op1), ")"), false);
  6455. flush_all_atomic_capable_variables();
  6456. }
  6457. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
  6458. uint32_t op0, uint32_t op1, uint32_t op2,
  6459. const char *op)
  6460. {
  6461. forced_temporaries.insert(result_id);
  6462. emit_op(result_type, result_id,
  6463. join(op, "(", to_non_uniform_aware_expression(op0), ", ",
  6464. to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
  6465. flush_all_atomic_capable_variables();
  6466. }
  6467. void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
  6468. SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
  6469. {
  6470. auto &out_type = get<SPIRType>(result_type);
  6471. auto &expr_type = expression_type(op0);
  6472. auto expected_type = out_type;
  6473. // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
  6474. expected_type.basetype = input_type;
  6475. expected_type.width = expr_type.width;
  6476. string cast_op;
  6477. if (expr_type.basetype != input_type)
  6478. {
  6479. if (expr_type.basetype == SPIRType::Boolean)
  6480. cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
  6481. else
  6482. cast_op = bitcast_glsl(expected_type, op0);
  6483. }
  6484. else
  6485. cast_op = to_unpacked_expression(op0);
  6486. string expr;
  6487. if (out_type.basetype != expected_result_type)
  6488. {
  6489. expected_type.basetype = expected_result_type;
  6490. expected_type.width = out_type.width;
  6491. if (out_type.basetype == SPIRType::Boolean)
  6492. expr = type_to_glsl(out_type);
  6493. else
  6494. expr = bitcast_glsl_op(out_type, expected_type);
  6495. expr += '(';
  6496. expr += join(op, "(", cast_op, ")");
  6497. expr += ')';
  6498. }
  6499. else
  6500. {
  6501. expr += join(op, "(", cast_op, ")");
  6502. }
  6503. emit_op(result_type, result_id, expr, should_forward(op0));
  6504. inherit_expression_dependencies(result_id, op0);
  6505. }
  6506. // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
  6507. // and different vector sizes all at once. Need a special purpose method here.
  6508. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6509. uint32_t op2, const char *op,
  6510. SPIRType::BaseType expected_result_type,
  6511. SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
  6512. SPIRType::BaseType input_type2)
  6513. {
  6514. auto &out_type = get<SPIRType>(result_type);
  6515. auto expected_type = out_type;
  6516. expected_type.basetype = input_type0;
  6517. string cast_op0 =
  6518. expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6519. auto op1_expr = to_unpacked_expression(op1);
  6520. auto op2_expr = to_unpacked_expression(op2);
  6521. // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
  6522. expected_type.basetype = input_type1;
  6523. expected_type.vecsize = 1;
  6524. string cast_op1 = expression_type(op1).basetype != input_type1 ?
  6525. join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
  6526. op1_expr;
  6527. expected_type.basetype = input_type2;
  6528. expected_type.vecsize = 1;
  6529. string cast_op2 = expression_type(op2).basetype != input_type2 ?
  6530. join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
  6531. op2_expr;
  6532. string expr;
  6533. if (out_type.basetype != expected_result_type)
  6534. {
  6535. expected_type.vecsize = out_type.vecsize;
  6536. expected_type.basetype = expected_result_type;
  6537. expr = bitcast_glsl_op(out_type, expected_type);
  6538. expr += '(';
  6539. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6540. expr += ')';
  6541. }
  6542. else
  6543. {
  6544. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6545. }
  6546. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6547. inherit_expression_dependencies(result_id, op0);
  6548. inherit_expression_dependencies(result_id, op1);
  6549. inherit_expression_dependencies(result_id, op2);
  6550. }
  6551. void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6552. uint32_t op2, const char *op, SPIRType::BaseType input_type)
  6553. {
  6554. auto &out_type = get<SPIRType>(result_type);
  6555. auto expected_type = out_type;
  6556. expected_type.basetype = input_type;
  6557. string cast_op0 =
  6558. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6559. string cast_op1 =
  6560. expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
  6561. string cast_op2 =
  6562. expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
  6563. string expr;
  6564. if (out_type.basetype != input_type)
  6565. {
  6566. expr = bitcast_glsl_op(out_type, expected_type);
  6567. expr += '(';
  6568. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6569. expr += ')';
  6570. }
  6571. else
  6572. {
  6573. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6574. }
  6575. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6576. inherit_expression_dependencies(result_id, op0);
  6577. inherit_expression_dependencies(result_id, op1);
  6578. inherit_expression_dependencies(result_id, op2);
  6579. }
  6580. void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
  6581. uint32_t op1, const char *op, SPIRType::BaseType input_type)
  6582. {
  6583. // Special purpose method for implementing clustered subgroup opcodes.
  6584. // Main difference is that op1 does not participate in any casting, it needs to be a literal.
  6585. auto &out_type = get<SPIRType>(result_type);
  6586. auto expected_type = out_type;
  6587. expected_type.basetype = input_type;
  6588. string cast_op0 =
  6589. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6590. string expr;
  6591. if (out_type.basetype != input_type)
  6592. {
  6593. expr = bitcast_glsl_op(out_type, expected_type);
  6594. expr += '(';
  6595. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6596. expr += ')';
  6597. }
  6598. else
  6599. {
  6600. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6601. }
  6602. emit_op(result_type, result_id, expr, should_forward(op0));
  6603. inherit_expression_dependencies(result_id, op0);
  6604. }
  6605. void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6606. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  6607. {
  6608. string cast_op0, cast_op1;
  6609. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  6610. auto &out_type = get<SPIRType>(result_type);
  6611. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  6612. string expr;
  6613. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  6614. {
  6615. expected_type.basetype = input_type;
  6616. expr = bitcast_glsl_op(out_type, expected_type);
  6617. expr += '(';
  6618. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6619. expr += ')';
  6620. }
  6621. else
  6622. {
  6623. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6624. }
  6625. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6626. inherit_expression_dependencies(result_id, op0);
  6627. inherit_expression_dependencies(result_id, op1);
  6628. }
  6629. void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6630. uint32_t op2, const char *op)
  6631. {
  6632. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
  6633. emit_op(result_type, result_id,
  6634. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6635. to_unpacked_expression(op2), ")"),
  6636. forward);
  6637. inherit_expression_dependencies(result_id, op0);
  6638. inherit_expression_dependencies(result_id, op1);
  6639. inherit_expression_dependencies(result_id, op2);
  6640. }
  6641. void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6642. uint32_t op2, uint32_t op3, const char *op)
  6643. {
  6644. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6645. emit_op(result_type, result_id,
  6646. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6647. to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
  6648. forward);
  6649. inherit_expression_dependencies(result_id, op0);
  6650. inherit_expression_dependencies(result_id, op1);
  6651. inherit_expression_dependencies(result_id, op2);
  6652. inherit_expression_dependencies(result_id, op3);
  6653. }
  6654. void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6655. uint32_t op2, uint32_t op3, const char *op,
  6656. SPIRType::BaseType offset_count_type)
  6657. {
  6658. // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
  6659. // and bitfieldInsert is sign invariant.
  6660. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6661. auto op0_expr = to_unpacked_expression(op0);
  6662. auto op1_expr = to_unpacked_expression(op1);
  6663. auto op2_expr = to_unpacked_expression(op2);
  6664. auto op3_expr = to_unpacked_expression(op3);
  6665. assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int);
  6666. SPIRType target_type { OpTypeInt };
  6667. target_type.width = 32;
  6668. target_type.vecsize = 1;
  6669. target_type.basetype = offset_count_type;
  6670. if (expression_type(op2).basetype != offset_count_type)
  6671. {
  6672. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6673. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
  6674. }
  6675. if (expression_type(op3).basetype != offset_count_type)
  6676. {
  6677. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6678. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
  6679. }
  6680. emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
  6681. forward);
  6682. inherit_expression_dependencies(result_id, op0);
  6683. inherit_expression_dependencies(result_id, op1);
  6684. inherit_expression_dependencies(result_id, op2);
  6685. inherit_expression_dependencies(result_id, op3);
  6686. }
  6687. string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
  6688. {
  6689. const char *type;
  6690. switch (imgtype.image.dim)
  6691. {
  6692. case Dim1D:
  6693. // Force 2D path for ES.
  6694. if (options.es)
  6695. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6696. else
  6697. type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
  6698. break;
  6699. case Dim2D:
  6700. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6701. break;
  6702. case Dim3D:
  6703. type = "3D";
  6704. break;
  6705. case DimCube:
  6706. type = "Cube";
  6707. break;
  6708. case DimRect:
  6709. type = "2DRect";
  6710. break;
  6711. case DimBuffer:
  6712. type = "Buffer";
  6713. break;
  6714. case DimSubpassData:
  6715. type = "2D";
  6716. break;
  6717. default:
  6718. type = "";
  6719. break;
  6720. }
  6721. // In legacy GLSL, an extension is required for textureLod in the fragment
  6722. // shader or textureGrad anywhere.
  6723. bool legacy_lod_ext = false;
  6724. auto &execution = get_entry_point();
  6725. if (op == "textureGrad" || op == "textureProjGrad" ||
  6726. ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
  6727. {
  6728. if (is_legacy_es())
  6729. {
  6730. legacy_lod_ext = true;
  6731. require_extension_internal("GL_EXT_shader_texture_lod");
  6732. }
  6733. else if (is_legacy_desktop())
  6734. require_extension_internal("GL_ARB_shader_texture_lod");
  6735. }
  6736. if (op == "textureLodOffset" || op == "textureProjLodOffset")
  6737. {
  6738. if (is_legacy_es())
  6739. SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
  6740. require_extension_internal("GL_EXT_gpu_shader4");
  6741. }
  6742. // GLES has very limited support for shadow samplers.
  6743. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
  6744. // everything else can just throw
  6745. bool is_comparison = is_depth_image(imgtype, tex);
  6746. if (is_comparison && is_legacy_es())
  6747. {
  6748. if (op == "texture" || op == "textureProj")
  6749. require_extension_internal("GL_EXT_shadow_samplers");
  6750. else
  6751. SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
  6752. if (imgtype.image.dim == DimCube)
  6753. return "shadowCubeNV";
  6754. }
  6755. if (op == "textureSize")
  6756. {
  6757. if (is_legacy_es())
  6758. SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
  6759. if (is_comparison)
  6760. SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
  6761. require_extension_internal("GL_EXT_gpu_shader4");
  6762. }
  6763. if (op == "texelFetch" && is_legacy_es())
  6764. SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
  6765. bool is_es_and_depth = is_legacy_es() && is_comparison;
  6766. std::string type_prefix = is_comparison ? "shadow" : "texture";
  6767. if (op == "texture")
  6768. return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
  6769. else if (op == "textureLod")
  6770. return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
  6771. else if (op == "textureProj")
  6772. return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
  6773. else if (op == "textureGrad")
  6774. return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
  6775. else if (op == "textureProjLod")
  6776. return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
  6777. else if (op == "textureLodOffset")
  6778. return join(type_prefix, type, "LodOffset");
  6779. else if (op == "textureProjGrad")
  6780. return join(type_prefix, type,
  6781. is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
  6782. else if (op == "textureProjLodOffset")
  6783. return join(type_prefix, type, "ProjLodOffset");
  6784. else if (op == "textureSize")
  6785. return join("textureSize", type);
  6786. else if (op == "texelFetch")
  6787. return join("texelFetch", type);
  6788. else
  6789. {
  6790. SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
  6791. }
  6792. }
  6793. bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
  6794. {
  6795. auto *cleft = maybe_get<SPIRConstant>(left);
  6796. auto *cright = maybe_get<SPIRConstant>(right);
  6797. auto &lerptype = expression_type(lerp);
  6798. // If our targets aren't constants, we cannot use construction.
  6799. if (!cleft || !cright)
  6800. return false;
  6801. // If our targets are spec constants, we cannot use construction.
  6802. if (cleft->specialization || cright->specialization)
  6803. return false;
  6804. auto &value_type = get<SPIRType>(cleft->constant_type);
  6805. if (lerptype.basetype != SPIRType::Boolean)
  6806. return false;
  6807. if (value_type.basetype == SPIRType::Struct || is_array(value_type))
  6808. return false;
  6809. if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
  6810. return false;
  6811. // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
  6812. // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
  6813. // Just avoid this case.
  6814. if (value_type.columns > 1)
  6815. return false;
  6816. // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
  6817. bool ret = true;
  6818. for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
  6819. {
  6820. switch (type.basetype)
  6821. {
  6822. case SPIRType::Short:
  6823. case SPIRType::UShort:
  6824. ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1;
  6825. break;
  6826. case SPIRType::Int:
  6827. case SPIRType::UInt:
  6828. ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1;
  6829. break;
  6830. case SPIRType::Half:
  6831. ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f;
  6832. break;
  6833. case SPIRType::Float:
  6834. ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f;
  6835. break;
  6836. case SPIRType::Double:
  6837. ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0;
  6838. break;
  6839. case SPIRType::Int64:
  6840. case SPIRType::UInt64:
  6841. ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1;
  6842. break;
  6843. default:
  6844. ret = false;
  6845. break;
  6846. }
  6847. }
  6848. if (ret)
  6849. op = type_to_glsl_constructor(type);
  6850. return ret;
  6851. }
  6852. string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
  6853. uint32_t false_value)
  6854. {
  6855. string expr;
  6856. auto &lerptype = expression_type(select);
  6857. if (lerptype.vecsize == 1)
  6858. expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
  6859. to_enclosed_pointer_expression(false_value));
  6860. else
  6861. {
  6862. auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
  6863. expr = type_to_glsl_constructor(restype);
  6864. expr += "(";
  6865. for (uint32_t i = 0; i < restype.vecsize; i++)
  6866. {
  6867. expr += swiz(select, i);
  6868. expr += " ? ";
  6869. expr += swiz(true_value, i);
  6870. expr += " : ";
  6871. expr += swiz(false_value, i);
  6872. if (i + 1 < restype.vecsize)
  6873. expr += ", ";
  6874. }
  6875. expr += ")";
  6876. }
  6877. return expr;
  6878. }
  6879. void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
  6880. {
  6881. auto &lerptype = expression_type(lerp);
  6882. auto &restype = get<SPIRType>(result_type);
  6883. // If this results in a variable pointer, assume it may be written through.
  6884. if (restype.pointer)
  6885. {
  6886. register_write(left);
  6887. register_write(right);
  6888. }
  6889. string mix_op;
  6890. bool has_boolean_mix = *backend.boolean_mix_function &&
  6891. ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
  6892. bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
  6893. // Cannot use boolean mix when the lerp argument is just one boolean,
  6894. // fall back to regular trinary statements.
  6895. if (lerptype.vecsize == 1)
  6896. has_boolean_mix = false;
  6897. // If we can reduce the mix to a simple cast, do so.
  6898. // This helps for cases like int(bool), uint(bool) which is implemented with
  6899. // OpSelect bool 1 0.
  6900. if (trivial_mix)
  6901. {
  6902. emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
  6903. }
  6904. else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
  6905. {
  6906. // Boolean mix not supported on desktop without extension.
  6907. // Was added in OpenGL 4.5 with ES 3.1 compat.
  6908. //
  6909. // Could use GL_EXT_shader_integer_mix on desktop at least,
  6910. // but Apple doesn't support it. :(
  6911. // Just implement it as ternary expressions.
  6912. auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
  6913. emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
  6914. inherit_expression_dependencies(id, left);
  6915. inherit_expression_dependencies(id, right);
  6916. inherit_expression_dependencies(id, lerp);
  6917. }
  6918. else if (lerptype.basetype == SPIRType::Boolean)
  6919. emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
  6920. else
  6921. emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
  6922. }
  6923. string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
  6924. {
  6925. // Keep track of the array indices we have used to load the image.
  6926. // We'll need to use the same array index into the combined image sampler array.
  6927. auto image_expr = to_non_uniform_aware_expression(image_id);
  6928. string array_expr;
  6929. auto array_index = image_expr.find_first_of('[');
  6930. if (array_index != string::npos)
  6931. array_expr = image_expr.substr(array_index, string::npos);
  6932. auto &args = current_function->arguments;
  6933. // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
  6934. // all possible combinations into new sampler2D uniforms.
  6935. auto *image = maybe_get_backing_variable(image_id);
  6936. auto *samp = maybe_get_backing_variable(samp_id);
  6937. if (image)
  6938. image_id = image->self;
  6939. if (samp)
  6940. samp_id = samp->self;
  6941. auto image_itr = find_if(begin(args), end(args),
  6942. [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
  6943. auto sampler_itr = find_if(begin(args), end(args),
  6944. [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
  6945. if (image_itr != end(args) || sampler_itr != end(args))
  6946. {
  6947. // If any parameter originates from a parameter, we will find it in our argument list.
  6948. bool global_image = image_itr == end(args);
  6949. bool global_sampler = sampler_itr == end(args);
  6950. VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
  6951. VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
  6952. auto &combined = current_function->combined_parameters;
  6953. auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
  6954. return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
  6955. p.sampler_id == sid;
  6956. });
  6957. if (itr != end(combined))
  6958. return to_expression(itr->id) + array_expr;
  6959. else
  6960. {
  6961. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
  6962. "build_combined_image_samplers() used "
  6963. "before compile() was called?");
  6964. }
  6965. }
  6966. else
  6967. {
  6968. // For global sampler2D, look directly at the global remapping table.
  6969. auto &mapping = combined_image_samplers;
  6970. auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
  6971. return combined.image_id == image_id && combined.sampler_id == samp_id;
  6972. });
  6973. if (itr != end(combined_image_samplers))
  6974. return to_expression(itr->combined_id) + array_expr;
  6975. else
  6976. {
  6977. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
  6978. "before compile() was called?");
  6979. }
  6980. }
  6981. }
  6982. bool CompilerGLSL::is_supported_subgroup_op_in_opengl(Op op, const uint32_t *ops)
  6983. {
  6984. switch (op)
  6985. {
  6986. case OpGroupNonUniformElect:
  6987. case OpGroupNonUniformBallot:
  6988. case OpGroupNonUniformBallotFindLSB:
  6989. case OpGroupNonUniformBallotFindMSB:
  6990. case OpGroupNonUniformBroadcast:
  6991. case OpGroupNonUniformBroadcastFirst:
  6992. case OpGroupNonUniformAll:
  6993. case OpGroupNonUniformAny:
  6994. case OpGroupNonUniformAllEqual:
  6995. case OpControlBarrier:
  6996. case OpMemoryBarrier:
  6997. case OpGroupNonUniformBallotBitCount:
  6998. case OpGroupNonUniformBallotBitExtract:
  6999. case OpGroupNonUniformInverseBallot:
  7000. return true;
  7001. case OpGroupNonUniformIAdd:
  7002. case OpGroupNonUniformFAdd:
  7003. case OpGroupNonUniformIMul:
  7004. case OpGroupNonUniformFMul:
  7005. {
  7006. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  7007. if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
  7008. operation == GroupOperationExclusiveScan)
  7009. {
  7010. return true;
  7011. }
  7012. else
  7013. {
  7014. return false;
  7015. }
  7016. }
  7017. default:
  7018. return false;
  7019. }
  7020. }
  7021. void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  7022. {
  7023. if (options.vulkan_semantics && combined_image_samplers.empty())
  7024. {
  7025. emit_binary_func_op(result_type, result_id, image_id, samp_id,
  7026. type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
  7027. }
  7028. else
  7029. {
  7030. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  7031. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  7032. }
  7033. // Make sure to suppress usage tracking and any expression invalidation.
  7034. // It is illegal to create temporaries of opaque types.
  7035. forwarded_temporaries.erase(result_id);
  7036. }
  7037. static inline bool image_opcode_is_sample_no_dref(Op op)
  7038. {
  7039. switch (op)
  7040. {
  7041. case OpImageSampleExplicitLod:
  7042. case OpImageSampleImplicitLod:
  7043. case OpImageSampleProjExplicitLod:
  7044. case OpImageSampleProjImplicitLod:
  7045. case OpImageFetch:
  7046. case OpImageRead:
  7047. case OpImageSparseSampleExplicitLod:
  7048. case OpImageSparseSampleImplicitLod:
  7049. case OpImageSparseSampleProjExplicitLod:
  7050. case OpImageSparseSampleProjImplicitLod:
  7051. case OpImageSparseFetch:
  7052. case OpImageSparseRead:
  7053. return true;
  7054. default:
  7055. return false;
  7056. }
  7057. }
  7058. void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
  7059. uint32_t &texel_id)
  7060. {
  7061. // Need to allocate two temporaries.
  7062. if (options.es)
  7063. SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
  7064. require_extension_internal("GL_ARB_sparse_texture2");
  7065. auto &temps = extra_sub_expressions[id];
  7066. if (temps == 0)
  7067. temps = ir.increase_bound_by(2);
  7068. feedback_id = temps + 0;
  7069. texel_id = temps + 1;
  7070. auto &return_type = get<SPIRType>(result_type_id);
  7071. if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
  7072. SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
  7073. emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
  7074. emit_uninitialized_temporary(return_type.member_types[1], texel_id);
  7075. }
  7076. uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
  7077. {
  7078. auto itr = extra_sub_expressions.find(id);
  7079. if (itr == extra_sub_expressions.end())
  7080. return 0;
  7081. else
  7082. return itr->second + 1;
  7083. }
  7084. void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
  7085. {
  7086. auto *ops = stream(i);
  7087. auto op = static_cast<Op>(i.op);
  7088. SmallVector<uint32_t> inherited_expressions;
  7089. uint32_t result_type_id = ops[0];
  7090. uint32_t id = ops[1];
  7091. auto &return_type = get<SPIRType>(result_type_id);
  7092. uint32_t sparse_code_id = 0;
  7093. uint32_t sparse_texel_id = 0;
  7094. if (sparse)
  7095. emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
  7096. bool forward = false;
  7097. string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
  7098. if (sparse)
  7099. {
  7100. statement(to_expression(sparse_code_id), " = ", expr, ";");
  7101. expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
  7102. ")");
  7103. forward = true;
  7104. inherited_expressions.clear();
  7105. }
  7106. emit_op(result_type_id, id, expr, forward);
  7107. for (auto &inherit : inherited_expressions)
  7108. inherit_expression_dependencies(id, inherit);
  7109. // Do not register sparse ops as control dependent as they are always lowered to a temporary.
  7110. switch (op)
  7111. {
  7112. case OpImageSampleDrefImplicitLod:
  7113. case OpImageSampleImplicitLod:
  7114. case OpImageSampleProjImplicitLod:
  7115. case OpImageSampleProjDrefImplicitLod:
  7116. register_control_dependent_expression(id);
  7117. break;
  7118. default:
  7119. break;
  7120. }
  7121. }
  7122. std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  7123. SmallVector<uint32_t> &inherited_expressions)
  7124. {
  7125. auto *ops = stream(i);
  7126. auto op = static_cast<Op>(i.op);
  7127. uint32_t length = i.length;
  7128. uint32_t result_type_id = ops[0];
  7129. VariableID img = ops[2];
  7130. uint32_t coord = ops[3];
  7131. uint32_t dref = 0;
  7132. uint32_t comp = 0;
  7133. bool gather = false;
  7134. bool proj = false;
  7135. bool fetch = false;
  7136. bool nonuniform_expression = false;
  7137. const uint32_t *opt = nullptr;
  7138. auto &result_type = get<SPIRType>(result_type_id);
  7139. inherited_expressions.push_back(coord);
  7140. if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
  7141. nonuniform_expression = true;
  7142. switch (op)
  7143. {
  7144. case OpImageSampleDrefImplicitLod:
  7145. case OpImageSampleDrefExplicitLod:
  7146. case OpImageSparseSampleDrefImplicitLod:
  7147. case OpImageSparseSampleDrefExplicitLod:
  7148. dref = ops[4];
  7149. opt = &ops[5];
  7150. length -= 5;
  7151. break;
  7152. case OpImageSampleProjDrefImplicitLod:
  7153. case OpImageSampleProjDrefExplicitLod:
  7154. case OpImageSparseSampleProjDrefImplicitLod:
  7155. case OpImageSparseSampleProjDrefExplicitLod:
  7156. dref = ops[4];
  7157. opt = &ops[5];
  7158. length -= 5;
  7159. proj = true;
  7160. break;
  7161. case OpImageDrefGather:
  7162. case OpImageSparseDrefGather:
  7163. dref = ops[4];
  7164. opt = &ops[5];
  7165. length -= 5;
  7166. gather = true;
  7167. if (options.es && options.version < 310)
  7168. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  7169. else if (!options.es && options.version < 400)
  7170. SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
  7171. break;
  7172. case OpImageGather:
  7173. case OpImageSparseGather:
  7174. comp = ops[4];
  7175. opt = &ops[5];
  7176. length -= 5;
  7177. gather = true;
  7178. if (options.es && options.version < 310)
  7179. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  7180. else if (!options.es && options.version < 400)
  7181. {
  7182. if (!expression_is_constant_null(comp))
  7183. SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
  7184. require_extension_internal("GL_ARB_texture_gather");
  7185. }
  7186. break;
  7187. case OpImageFetch:
  7188. case OpImageSparseFetch:
  7189. case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
  7190. opt = &ops[4];
  7191. length -= 4;
  7192. fetch = true;
  7193. break;
  7194. case OpImageSampleProjImplicitLod:
  7195. case OpImageSampleProjExplicitLod:
  7196. case OpImageSparseSampleProjImplicitLod:
  7197. case OpImageSparseSampleProjExplicitLod:
  7198. opt = &ops[4];
  7199. length -= 4;
  7200. proj = true;
  7201. break;
  7202. default:
  7203. opt = &ops[4];
  7204. length -= 4;
  7205. break;
  7206. }
  7207. // Bypass pointers because we need the real image struct
  7208. auto &type = expression_type(img);
  7209. auto &imgtype = get<SPIRType>(type.self);
  7210. uint32_t coord_components = 0;
  7211. switch (imgtype.image.dim)
  7212. {
  7213. case Dim1D:
  7214. coord_components = 1;
  7215. break;
  7216. case Dim2D:
  7217. coord_components = 2;
  7218. break;
  7219. case Dim3D:
  7220. coord_components = 3;
  7221. break;
  7222. case DimCube:
  7223. coord_components = 3;
  7224. break;
  7225. case DimBuffer:
  7226. coord_components = 1;
  7227. break;
  7228. default:
  7229. coord_components = 2;
  7230. break;
  7231. }
  7232. if (dref)
  7233. inherited_expressions.push_back(dref);
  7234. if (proj)
  7235. coord_components++;
  7236. if (imgtype.image.arrayed)
  7237. coord_components++;
  7238. uint32_t bias = 0;
  7239. uint32_t lod = 0;
  7240. uint32_t grad_x = 0;
  7241. uint32_t grad_y = 0;
  7242. uint32_t coffset = 0;
  7243. uint32_t offset = 0;
  7244. uint32_t coffsets = 0;
  7245. uint32_t sample = 0;
  7246. uint32_t minlod = 0;
  7247. uint32_t flags = 0;
  7248. if (length)
  7249. {
  7250. flags = *opt++;
  7251. length--;
  7252. }
  7253. auto test = [&](uint32_t &v, uint32_t flag) {
  7254. if (length && (flags & flag))
  7255. {
  7256. v = *opt++;
  7257. inherited_expressions.push_back(v);
  7258. length--;
  7259. }
  7260. };
  7261. test(bias, ImageOperandsBiasMask);
  7262. test(lod, ImageOperandsLodMask);
  7263. test(grad_x, ImageOperandsGradMask);
  7264. test(grad_y, ImageOperandsGradMask);
  7265. test(coffset, ImageOperandsConstOffsetMask);
  7266. test(offset, ImageOperandsOffsetMask);
  7267. test(coffsets, ImageOperandsConstOffsetsMask);
  7268. test(sample, ImageOperandsSampleMask);
  7269. test(minlod, ImageOperandsMinLodMask);
  7270. TextureFunctionBaseArguments base_args = {};
  7271. base_args.img = img;
  7272. base_args.imgtype = &imgtype;
  7273. base_args.is_fetch = fetch != 0;
  7274. base_args.is_gather = gather != 0;
  7275. base_args.is_proj = proj != 0;
  7276. string expr;
  7277. TextureFunctionNameArguments name_args = {};
  7278. name_args.base = base_args;
  7279. name_args.has_array_offsets = coffsets != 0;
  7280. name_args.has_offset = coffset != 0 || offset != 0;
  7281. name_args.has_grad = grad_x != 0 || grad_y != 0;
  7282. name_args.has_dref = dref != 0;
  7283. name_args.is_sparse_feedback = sparse;
  7284. name_args.has_min_lod = minlod != 0;
  7285. name_args.lod = lod;
  7286. expr += to_function_name(name_args);
  7287. expr += "(";
  7288. uint32_t sparse_texel_id = 0;
  7289. if (sparse)
  7290. sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
  7291. TextureFunctionArguments args = {};
  7292. args.base = base_args;
  7293. args.coord = coord;
  7294. args.coord_components = coord_components;
  7295. args.dref = dref;
  7296. args.grad_x = grad_x;
  7297. args.grad_y = grad_y;
  7298. args.lod = lod;
  7299. args.has_array_offsets = coffsets != 0;
  7300. if (coffsets)
  7301. args.offset = coffsets;
  7302. else if (coffset)
  7303. args.offset = coffset;
  7304. else
  7305. args.offset = offset;
  7306. args.bias = bias;
  7307. args.component = comp;
  7308. args.sample = sample;
  7309. args.sparse_texel = sparse_texel_id;
  7310. args.min_lod = minlod;
  7311. args.nonuniform_expression = nonuniform_expression;
  7312. expr += to_function_args(args, forward);
  7313. expr += ")";
  7314. // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
  7315. if (is_legacy() && !options.es && is_depth_image(imgtype, img))
  7316. expr += ".r";
  7317. // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
  7318. // Remap back to 4 components as sampling opcodes expect.
  7319. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
  7320. {
  7321. bool image_is_depth = false;
  7322. const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  7323. VariableID image_id = combined ? combined->image : img;
  7324. if (combined && is_depth_image(imgtype, combined->image))
  7325. image_is_depth = true;
  7326. else if (is_depth_image(imgtype, img))
  7327. image_is_depth = true;
  7328. // We must also check the backing variable for the image.
  7329. // We might have loaded an OpImage, and used that handle for two different purposes.
  7330. // Once with comparison, once without.
  7331. auto *image_variable = maybe_get_backing_variable(image_id);
  7332. if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
  7333. image_is_depth = true;
  7334. if (image_is_depth)
  7335. expr = remap_swizzle(result_type, 1, expr);
  7336. }
  7337. if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
  7338. {
  7339. // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
  7340. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
  7341. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
  7342. }
  7343. // Deals with reads from MSL. We might need to downconvert to fewer components.
  7344. if (op == OpImageRead)
  7345. expr = remap_swizzle(result_type, 4, expr);
  7346. return expr;
  7347. }
  7348. bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
  7349. {
  7350. auto *c = maybe_get<SPIRConstant>(id);
  7351. if (!c)
  7352. return false;
  7353. return c->constant_is_null();
  7354. }
  7355. bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
  7356. {
  7357. auto &type = expression_type(ptr);
  7358. if (!is_array(get_pointee_type(type)))
  7359. return false;
  7360. if (!backend.array_is_value_type)
  7361. return true;
  7362. auto *var = maybe_get_backing_variable(ptr);
  7363. if (!var)
  7364. return false;
  7365. auto &backed_type = get<SPIRType>(var->basetype);
  7366. return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
  7367. has_member_decoration(backed_type.self, 0, DecorationOffset);
  7368. }
  7369. // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
  7370. // For some subclasses, the function is a method on the specified image.
  7371. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
  7372. {
  7373. if (args.has_min_lod)
  7374. {
  7375. if (options.es)
  7376. SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
  7377. require_extension_internal("GL_ARB_sparse_texture_clamp");
  7378. }
  7379. string fname;
  7380. auto &imgtype = *args.base.imgtype;
  7381. VariableID tex = args.base.img;
  7382. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  7383. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  7384. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  7385. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  7386. bool workaround_lod_array_shadow_as_grad = false;
  7387. if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  7388. is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch)
  7389. {
  7390. if (!expression_is_constant_null(args.lod))
  7391. {
  7392. SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
  7393. "expressed in GLSL.");
  7394. }
  7395. workaround_lod_array_shadow_as_grad = true;
  7396. }
  7397. if (args.is_sparse_feedback)
  7398. fname += "sparse";
  7399. if (args.base.is_fetch)
  7400. fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
  7401. else
  7402. {
  7403. fname += args.is_sparse_feedback ? "Texture" : "texture";
  7404. if (args.base.is_gather)
  7405. fname += "Gather";
  7406. if (args.has_array_offsets)
  7407. fname += "Offsets";
  7408. if (args.base.is_proj)
  7409. fname += "Proj";
  7410. if (args.has_grad || workaround_lod_array_shadow_as_grad)
  7411. fname += "Grad";
  7412. if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
  7413. fname += "Lod";
  7414. }
  7415. if (args.has_offset)
  7416. fname += "Offset";
  7417. if (args.has_min_lod)
  7418. fname += "Clamp";
  7419. if (args.is_sparse_feedback || args.has_min_lod)
  7420. fname += "ARB";
  7421. return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
  7422. }
  7423. std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
  7424. {
  7425. auto *var = maybe_get_backing_variable(id);
  7426. // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
  7427. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
  7428. if (var)
  7429. {
  7430. auto &type = get<SPIRType>(var->basetype);
  7431. if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
  7432. {
  7433. if (options.vulkan_semantics)
  7434. {
  7435. if (dummy_sampler_id)
  7436. {
  7437. // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
  7438. auto sampled_type = type;
  7439. sampled_type.basetype = SPIRType::SampledImage;
  7440. return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
  7441. to_expression(dummy_sampler_id), ")");
  7442. }
  7443. else
  7444. {
  7445. // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
  7446. require_extension_internal("GL_EXT_samplerless_texture_functions");
  7447. }
  7448. }
  7449. else
  7450. {
  7451. if (!dummy_sampler_id)
  7452. SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
  7453. "build_dummy_sampler_for_combined_images() called?");
  7454. return to_combined_image_sampler(id, dummy_sampler_id);
  7455. }
  7456. }
  7457. }
  7458. return to_non_uniform_aware_expression(id);
  7459. }
  7460. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  7461. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  7462. {
  7463. VariableID img = args.base.img;
  7464. auto &imgtype = *args.base.imgtype;
  7465. string farg_str;
  7466. if (args.base.is_fetch)
  7467. farg_str = convert_separate_image_to_expression(img);
  7468. else
  7469. farg_str = to_non_uniform_aware_expression(img);
  7470. if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
  7471. {
  7472. // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
  7473. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
  7474. }
  7475. bool swizz_func = backend.swizzle_is_function;
  7476. auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
  7477. if (comps == in_comps)
  7478. return "";
  7479. switch (comps)
  7480. {
  7481. case 1:
  7482. return ".x";
  7483. case 2:
  7484. return swizz_func ? ".xy()" : ".xy";
  7485. case 3:
  7486. return swizz_func ? ".xyz()" : ".xyz";
  7487. default:
  7488. return "";
  7489. }
  7490. };
  7491. bool forward = should_forward(args.coord);
  7492. // The IR can give us more components than we need, so chop them off as needed.
  7493. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
  7494. // Only enclose the UV expression if needed.
  7495. auto coord_expr =
  7496. (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
  7497. // texelFetch only takes int, not uint.
  7498. auto &coord_type = expression_type(args.coord);
  7499. if (coord_type.basetype == SPIRType::UInt)
  7500. {
  7501. auto expected_type = coord_type;
  7502. expected_type.vecsize = args.coord_components;
  7503. expected_type.basetype = SPIRType::Int;
  7504. coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
  7505. }
  7506. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  7507. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  7508. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  7509. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  7510. bool workaround_lod_array_shadow_as_grad =
  7511. ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  7512. is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch;
  7513. if (args.dref)
  7514. {
  7515. forward = forward && should_forward(args.dref);
  7516. // SPIR-V splits dref and coordinate.
  7517. if (args.base.is_gather ||
  7518. args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
  7519. {
  7520. farg_str += ", ";
  7521. farg_str += to_expression(args.coord);
  7522. farg_str += ", ";
  7523. farg_str += to_expression(args.dref);
  7524. }
  7525. else if (args.base.is_proj)
  7526. {
  7527. // Have to reshuffle so we get vec4(coord, dref, proj), special case.
  7528. // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
  7529. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
  7530. farg_str += ", vec4(";
  7531. if (imgtype.image.dim == Dim1D)
  7532. {
  7533. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7534. farg_str += to_enclosed_expression(args.coord) + ".x";
  7535. farg_str += ", ";
  7536. farg_str += "0.0, ";
  7537. farg_str += to_expression(args.dref);
  7538. farg_str += ", ";
  7539. farg_str += to_enclosed_expression(args.coord) + ".y)";
  7540. }
  7541. else if (imgtype.image.dim == Dim2D)
  7542. {
  7543. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7544. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
  7545. farg_str += ", ";
  7546. farg_str += to_expression(args.dref);
  7547. farg_str += ", ";
  7548. farg_str += to_enclosed_expression(args.coord) + ".z)";
  7549. }
  7550. else
  7551. SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
  7552. }
  7553. else
  7554. {
  7555. // Create a composite which merges coord/dref into a single vector.
  7556. auto type = expression_type(args.coord);
  7557. type.vecsize = args.coord_components + 1;
  7558. if (imgtype.image.dim == Dim1D && options.es)
  7559. type.vecsize++;
  7560. farg_str += ", ";
  7561. farg_str += type_to_glsl_constructor(type);
  7562. farg_str += "(";
  7563. if (imgtype.image.dim == Dim1D && options.es)
  7564. {
  7565. if (imgtype.image.arrayed)
  7566. {
  7567. farg_str += enclose_expression(coord_expr) + ".x";
  7568. farg_str += ", 0.0, ";
  7569. farg_str += enclose_expression(coord_expr) + ".y";
  7570. }
  7571. else
  7572. {
  7573. farg_str += coord_expr;
  7574. farg_str += ", 0.0";
  7575. }
  7576. }
  7577. else
  7578. farg_str += coord_expr;
  7579. farg_str += ", ";
  7580. farg_str += to_expression(args.dref);
  7581. farg_str += ")";
  7582. }
  7583. }
  7584. else
  7585. {
  7586. if (imgtype.image.dim == Dim1D && options.es)
  7587. {
  7588. // Have to fake a second coordinate.
  7589. if (type_is_floating_point(coord_type))
  7590. {
  7591. // Cannot mix proj and array.
  7592. if (imgtype.image.arrayed || args.base.is_proj)
  7593. {
  7594. coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ",
  7595. enclose_expression(coord_expr), ".y)");
  7596. }
  7597. else
  7598. coord_expr = join("vec2(", coord_expr, ", 0.0)");
  7599. }
  7600. else
  7601. {
  7602. if (imgtype.image.arrayed)
  7603. {
  7604. coord_expr = join("ivec3(", enclose_expression(coord_expr),
  7605. ".x, 0, ",
  7606. enclose_expression(coord_expr), ".y)");
  7607. }
  7608. else
  7609. coord_expr = join("ivec2(", coord_expr, ", 0)");
  7610. }
  7611. }
  7612. farg_str += ", ";
  7613. farg_str += coord_expr;
  7614. }
  7615. if (args.grad_x || args.grad_y)
  7616. {
  7617. forward = forward && should_forward(args.grad_x);
  7618. forward = forward && should_forward(args.grad_y);
  7619. farg_str += ", ";
  7620. farg_str += to_expression(args.grad_x);
  7621. farg_str += ", ";
  7622. farg_str += to_expression(args.grad_y);
  7623. }
  7624. if (args.lod)
  7625. {
  7626. if (workaround_lod_array_shadow_as_grad)
  7627. {
  7628. // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
  7629. // Implementing this as plain texture() is not safe on some implementations.
  7630. if (imgtype.image.dim == Dim2D)
  7631. farg_str += ", vec2(0.0), vec2(0.0)";
  7632. else if (imgtype.image.dim == DimCube)
  7633. farg_str += ", vec3(0.0), vec3(0.0)";
  7634. }
  7635. else
  7636. {
  7637. forward = forward && should_forward(args.lod);
  7638. farg_str += ", ";
  7639. // Lod expression for TexelFetch in GLSL must be int, and only int.
  7640. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7641. farg_str += bitcast_expression(SPIRType::Int, args.lod);
  7642. else
  7643. farg_str += to_expression(args.lod);
  7644. }
  7645. }
  7646. else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7647. {
  7648. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  7649. farg_str += ", 0";
  7650. }
  7651. if (args.offset)
  7652. {
  7653. forward = forward && should_forward(args.offset);
  7654. farg_str += ", ";
  7655. farg_str += bitcast_expression(SPIRType::Int, args.offset);
  7656. }
  7657. if (args.sample)
  7658. {
  7659. farg_str += ", ";
  7660. farg_str += bitcast_expression(SPIRType::Int, args.sample);
  7661. }
  7662. if (args.min_lod)
  7663. {
  7664. farg_str += ", ";
  7665. farg_str += to_expression(args.min_lod);
  7666. }
  7667. if (args.sparse_texel)
  7668. {
  7669. // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
  7670. farg_str += ", ";
  7671. farg_str += to_expression(args.sparse_texel);
  7672. }
  7673. if (args.bias)
  7674. {
  7675. forward = forward && should_forward(args.bias);
  7676. farg_str += ", ";
  7677. farg_str += to_expression(args.bias);
  7678. }
  7679. if (args.component && !expression_is_constant_null(args.component))
  7680. {
  7681. forward = forward && should_forward(args.component);
  7682. farg_str += ", ";
  7683. farg_str += bitcast_expression(SPIRType::Int, args.component);
  7684. }
  7685. *p_forward = forward;
  7686. return farg_str;
  7687. }
  7688. Op CompilerGLSL::get_remapped_spirv_op(Op op) const
  7689. {
  7690. if (options.relax_nan_checks)
  7691. {
  7692. switch (op)
  7693. {
  7694. case OpFUnordLessThan:
  7695. op = OpFOrdLessThan;
  7696. break;
  7697. case OpFUnordLessThanEqual:
  7698. op = OpFOrdLessThanEqual;
  7699. break;
  7700. case OpFUnordGreaterThan:
  7701. op = OpFOrdGreaterThan;
  7702. break;
  7703. case OpFUnordGreaterThanEqual:
  7704. op = OpFOrdGreaterThanEqual;
  7705. break;
  7706. case OpFUnordEqual:
  7707. op = OpFOrdEqual;
  7708. break;
  7709. case OpFOrdNotEqual:
  7710. op = OpFUnordNotEqual;
  7711. break;
  7712. default:
  7713. break;
  7714. }
  7715. }
  7716. return op;
  7717. }
  7718. GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
  7719. {
  7720. // Relax to non-NaN aware opcodes.
  7721. if (options.relax_nan_checks)
  7722. {
  7723. switch (std450_op)
  7724. {
  7725. case GLSLstd450NClamp:
  7726. std450_op = GLSLstd450FClamp;
  7727. break;
  7728. case GLSLstd450NMin:
  7729. std450_op = GLSLstd450FMin;
  7730. break;
  7731. case GLSLstd450NMax:
  7732. std450_op = GLSLstd450FMax;
  7733. break;
  7734. default:
  7735. break;
  7736. }
  7737. }
  7738. return std450_op;
  7739. }
  7740. void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
  7741. {
  7742. auto op = static_cast<GLSLstd450>(eop);
  7743. if (is_legacy() && is_unsigned_glsl_opcode(op))
  7744. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
  7745. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  7746. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
  7747. auto int_type = to_signed_basetype(integer_width);
  7748. auto uint_type = to_unsigned_basetype(integer_width);
  7749. op = get_remapped_glsl_op(op);
  7750. switch (op)
  7751. {
  7752. // FP fiddling
  7753. case GLSLstd450Round:
  7754. if (!is_legacy())
  7755. emit_unary_func_op(result_type, id, args[0], "round");
  7756. else
  7757. {
  7758. auto op0 = to_enclosed_expression(args[0]);
  7759. auto &op0_type = expression_type(args[0]);
  7760. auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
  7761. bool forward = should_forward(args[0]);
  7762. emit_op(result_type, id, expr, forward);
  7763. inherit_expression_dependencies(id, args[0]);
  7764. }
  7765. break;
  7766. case GLSLstd450RoundEven:
  7767. if (!is_legacy())
  7768. emit_unary_func_op(result_type, id, args[0], "roundEven");
  7769. else if (!options.es)
  7770. {
  7771. // This extension provides round() with round-to-even semantics.
  7772. require_extension_internal("GL_EXT_gpu_shader4");
  7773. emit_unary_func_op(result_type, id, args[0], "round");
  7774. }
  7775. else
  7776. SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
  7777. break;
  7778. case GLSLstd450Trunc:
  7779. if (!is_legacy())
  7780. emit_unary_func_op(result_type, id, args[0], "trunc");
  7781. else
  7782. {
  7783. // Implement by value-casting to int and back.
  7784. bool forward = should_forward(args[0]);
  7785. auto op0 = to_unpacked_expression(args[0]);
  7786. auto &op0_type = expression_type(args[0]);
  7787. auto via_type = op0_type;
  7788. via_type.basetype = SPIRType::Int;
  7789. auto expr = join(type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", op0, "))");
  7790. emit_op(result_type, id, expr, forward);
  7791. inherit_expression_dependencies(id, args[0]);
  7792. }
  7793. break;
  7794. case GLSLstd450SAbs:
  7795. emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
  7796. break;
  7797. case GLSLstd450FAbs:
  7798. emit_unary_func_op(result_type, id, args[0], "abs");
  7799. break;
  7800. case GLSLstd450SSign:
  7801. emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
  7802. break;
  7803. case GLSLstd450FSign:
  7804. emit_unary_func_op(result_type, id, args[0], "sign");
  7805. break;
  7806. case GLSLstd450Floor:
  7807. emit_unary_func_op(result_type, id, args[0], "floor");
  7808. break;
  7809. case GLSLstd450Ceil:
  7810. emit_unary_func_op(result_type, id, args[0], "ceil");
  7811. break;
  7812. case GLSLstd450Fract:
  7813. emit_unary_func_op(result_type, id, args[0], "fract");
  7814. break;
  7815. case GLSLstd450Radians:
  7816. emit_unary_func_op(result_type, id, args[0], "radians");
  7817. break;
  7818. case GLSLstd450Degrees:
  7819. emit_unary_func_op(result_type, id, args[0], "degrees");
  7820. break;
  7821. case GLSLstd450Fma:
  7822. if ((!options.es && options.version < 400) || (options.es && options.version < 320))
  7823. {
  7824. auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
  7825. to_enclosed_expression(args[2]));
  7826. emit_op(result_type, id, expr,
  7827. should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
  7828. for (uint32_t i = 0; i < 3; i++)
  7829. inherit_expression_dependencies(id, args[i]);
  7830. }
  7831. else
  7832. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
  7833. break;
  7834. case GLSLstd450Modf:
  7835. register_call_out_argument(args[1]);
  7836. if (!is_legacy())
  7837. {
  7838. forced_temporaries.insert(id);
  7839. emit_binary_func_op(result_type, id, args[0], args[1], "modf");
  7840. }
  7841. else
  7842. {
  7843. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7844. auto &op1_type = expression_type(args[1]);
  7845. auto via_type = op1_type;
  7846. via_type.basetype = SPIRType::Int;
  7847. statement(to_expression(args[1]), " = ",
  7848. type_to_glsl(op1_type), "(", type_to_glsl(via_type),
  7849. "(", to_expression(args[0]), "));");
  7850. emit_binary_op(result_type, id, args[0], args[1], "-");
  7851. }
  7852. break;
  7853. case GLSLstd450ModfStruct:
  7854. {
  7855. auto &type = get<SPIRType>(result_type);
  7856. emit_uninitialized_temporary_expression(result_type, id);
  7857. if (!is_legacy())
  7858. {
  7859. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
  7860. to_expression(id), ".", to_member_name(type, 1), ");");
  7861. }
  7862. else
  7863. {
  7864. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7865. auto &op0_type = expression_type(args[0]);
  7866. auto via_type = op0_type;
  7867. via_type.basetype = SPIRType::Int;
  7868. statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type),
  7869. "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));");
  7870. statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ",
  7871. to_expression(id), ".", to_member_name(type, 1), ";");
  7872. }
  7873. break;
  7874. }
  7875. // Minmax
  7876. case GLSLstd450UMin:
  7877. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
  7878. break;
  7879. case GLSLstd450SMin:
  7880. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
  7881. break;
  7882. case GLSLstd450FMin:
  7883. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  7884. break;
  7885. case GLSLstd450FMax:
  7886. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  7887. break;
  7888. case GLSLstd450UMax:
  7889. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
  7890. break;
  7891. case GLSLstd450SMax:
  7892. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
  7893. break;
  7894. case GLSLstd450FClamp:
  7895. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  7896. break;
  7897. case GLSLstd450UClamp:
  7898. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
  7899. break;
  7900. case GLSLstd450SClamp:
  7901. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
  7902. break;
  7903. // Trig
  7904. case GLSLstd450Sin:
  7905. emit_unary_func_op(result_type, id, args[0], "sin");
  7906. break;
  7907. case GLSLstd450Cos:
  7908. emit_unary_func_op(result_type, id, args[0], "cos");
  7909. break;
  7910. case GLSLstd450Tan:
  7911. emit_unary_func_op(result_type, id, args[0], "tan");
  7912. break;
  7913. case GLSLstd450Asin:
  7914. emit_unary_func_op(result_type, id, args[0], "asin");
  7915. break;
  7916. case GLSLstd450Acos:
  7917. emit_unary_func_op(result_type, id, args[0], "acos");
  7918. break;
  7919. case GLSLstd450Atan:
  7920. emit_unary_func_op(result_type, id, args[0], "atan");
  7921. break;
  7922. case GLSLstd450Sinh:
  7923. if (!is_legacy())
  7924. emit_unary_func_op(result_type, id, args[0], "sinh");
  7925. else
  7926. {
  7927. bool forward = should_forward(args[0]);
  7928. auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7929. emit_op(result_type, id, expr, forward);
  7930. inherit_expression_dependencies(id, args[0]);
  7931. }
  7932. break;
  7933. case GLSLstd450Cosh:
  7934. if (!is_legacy())
  7935. emit_unary_func_op(result_type, id, args[0], "cosh");
  7936. else
  7937. {
  7938. bool forward = should_forward(args[0]);
  7939. auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7940. emit_op(result_type, id, expr, forward);
  7941. inherit_expression_dependencies(id, args[0]);
  7942. }
  7943. break;
  7944. case GLSLstd450Tanh:
  7945. if (!is_legacy())
  7946. emit_unary_func_op(result_type, id, args[0], "tanh");
  7947. else
  7948. {
  7949. // Create temporaries to store the result of exp(arg) and exp(-arg).
  7950. uint32_t &ids = extra_sub_expressions[id];
  7951. if (!ids)
  7952. {
  7953. ids = ir.increase_bound_by(2);
  7954. // Inherit precision qualifier (legacy has no NoContraction).
  7955. if (has_decoration(id, DecorationRelaxedPrecision))
  7956. {
  7957. set_decoration(ids, DecorationRelaxedPrecision);
  7958. set_decoration(ids + 1, DecorationRelaxedPrecision);
  7959. }
  7960. }
  7961. uint32_t epos_id = ids;
  7962. uint32_t eneg_id = ids + 1;
  7963. emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false);
  7964. emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false);
  7965. inherit_expression_dependencies(epos_id, args[0]);
  7966. inherit_expression_dependencies(eneg_id, args[0]);
  7967. auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / "
  7968. "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")");
  7969. emit_op(result_type, id, expr, true);
  7970. inherit_expression_dependencies(id, epos_id);
  7971. inherit_expression_dependencies(id, eneg_id);
  7972. }
  7973. break;
  7974. case GLSLstd450Asinh:
  7975. if (!is_legacy())
  7976. emit_unary_func_op(result_type, id, args[0], "asinh");
  7977. else
  7978. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh);
  7979. break;
  7980. case GLSLstd450Acosh:
  7981. if (!is_legacy())
  7982. emit_unary_func_op(result_type, id, args[0], "acosh");
  7983. else
  7984. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh);
  7985. break;
  7986. case GLSLstd450Atanh:
  7987. if (!is_legacy())
  7988. emit_unary_func_op(result_type, id, args[0], "atanh");
  7989. else
  7990. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh);
  7991. break;
  7992. case GLSLstd450Atan2:
  7993. emit_binary_func_op(result_type, id, args[0], args[1], "atan");
  7994. break;
  7995. // Exponentials
  7996. case GLSLstd450Pow:
  7997. emit_binary_func_op(result_type, id, args[0], args[1], "pow");
  7998. break;
  7999. case GLSLstd450Exp:
  8000. emit_unary_func_op(result_type, id, args[0], "exp");
  8001. break;
  8002. case GLSLstd450Log:
  8003. emit_unary_func_op(result_type, id, args[0], "log");
  8004. break;
  8005. case GLSLstd450Exp2:
  8006. emit_unary_func_op(result_type, id, args[0], "exp2");
  8007. break;
  8008. case GLSLstd450Log2:
  8009. emit_unary_func_op(result_type, id, args[0], "log2");
  8010. break;
  8011. case GLSLstd450Sqrt:
  8012. emit_unary_func_op(result_type, id, args[0], "sqrt");
  8013. break;
  8014. case GLSLstd450InverseSqrt:
  8015. emit_unary_func_op(result_type, id, args[0], "inversesqrt");
  8016. break;
  8017. // Matrix math
  8018. case GLSLstd450Determinant:
  8019. {
  8020. // No need to transpose - it doesn't affect the determinant
  8021. auto *e = maybe_get<SPIRExpression>(args[0]);
  8022. bool old_transpose = e && e->need_transpose;
  8023. if (old_transpose)
  8024. e->need_transpose = false;
  8025. if (options.version < 150) // also matches ES 100
  8026. {
  8027. auto &type = expression_type(args[0]);
  8028. assert(type.vecsize >= 2 && type.vecsize <= 4);
  8029. assert(type.vecsize == type.columns);
  8030. // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid
  8031. if (type.basetype != SPIRType::Float)
  8032. SPIRV_CROSS_THROW("Unsupported type for matrix determinant");
  8033. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8034. require_polyfill(static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)),
  8035. relaxed);
  8036. emit_unary_func_op(result_type, id, args[0],
  8037. (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant");
  8038. }
  8039. else
  8040. emit_unary_func_op(result_type, id, args[0], "determinant");
  8041. if (old_transpose)
  8042. e->need_transpose = true;
  8043. break;
  8044. }
  8045. case GLSLstd450MatrixInverse:
  8046. {
  8047. // The inverse of the transpose is the same as the transpose of
  8048. // the inverse, so we can just flip need_transpose of the result.
  8049. auto *a = maybe_get<SPIRExpression>(args[0]);
  8050. bool old_transpose = a && a->need_transpose;
  8051. if (old_transpose)
  8052. a->need_transpose = false;
  8053. const char *func = "inverse";
  8054. if (options.version < 140) // also matches ES 100
  8055. {
  8056. auto &type = get<SPIRType>(result_type);
  8057. assert(type.vecsize >= 2 && type.vecsize <= 4);
  8058. assert(type.vecsize == type.columns);
  8059. // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid
  8060. if (type.basetype != SPIRType::Float)
  8061. SPIRV_CROSS_THROW("Unsupported type for matrix inverse");
  8062. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8063. require_polyfill(static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)),
  8064. relaxed);
  8065. func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse";
  8066. }
  8067. bool forward = should_forward(args[0]);
  8068. auto &e = emit_op(result_type, id, join(func, "(", to_unpacked_expression(args[0]), ")"), forward);
  8069. inherit_expression_dependencies(id, args[0]);
  8070. if (old_transpose)
  8071. {
  8072. e.need_transpose = true;
  8073. a->need_transpose = true;
  8074. }
  8075. break;
  8076. }
  8077. // Lerping
  8078. case GLSLstd450FMix:
  8079. case GLSLstd450IMix:
  8080. {
  8081. emit_mix_op(result_type, id, args[0], args[1], args[2]);
  8082. break;
  8083. }
  8084. case GLSLstd450Step:
  8085. emit_binary_func_op(result_type, id, args[0], args[1], "step");
  8086. break;
  8087. case GLSLstd450SmoothStep:
  8088. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
  8089. break;
  8090. // Packing
  8091. case GLSLstd450Frexp:
  8092. register_call_out_argument(args[1]);
  8093. forced_temporaries.insert(id);
  8094. emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
  8095. break;
  8096. case GLSLstd450FrexpStruct:
  8097. {
  8098. auto &type = get<SPIRType>(result_type);
  8099. emit_uninitialized_temporary_expression(result_type, id);
  8100. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
  8101. to_expression(id), ".", to_member_name(type, 1), ");");
  8102. break;
  8103. }
  8104. case GLSLstd450Ldexp:
  8105. {
  8106. bool forward = should_forward(args[0]) && should_forward(args[1]);
  8107. auto op0 = to_unpacked_expression(args[0]);
  8108. auto op1 = to_unpacked_expression(args[1]);
  8109. auto &op1_type = expression_type(args[1]);
  8110. if (op1_type.basetype != SPIRType::Int)
  8111. {
  8112. // Need a value cast here.
  8113. auto target_type = op1_type;
  8114. target_type.basetype = SPIRType::Int;
  8115. op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
  8116. }
  8117. auto expr = join("ldexp(", op0, ", ", op1, ")");
  8118. emit_op(result_type, id, expr, forward);
  8119. inherit_expression_dependencies(id, args[0]);
  8120. inherit_expression_dependencies(id, args[1]);
  8121. break;
  8122. }
  8123. case GLSLstd450PackSnorm4x8:
  8124. emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
  8125. break;
  8126. case GLSLstd450PackUnorm4x8:
  8127. emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
  8128. break;
  8129. case GLSLstd450PackSnorm2x16:
  8130. emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
  8131. break;
  8132. case GLSLstd450PackUnorm2x16:
  8133. emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
  8134. break;
  8135. case GLSLstd450PackHalf2x16:
  8136. emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
  8137. break;
  8138. case GLSLstd450UnpackSnorm4x8:
  8139. emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
  8140. break;
  8141. case GLSLstd450UnpackUnorm4x8:
  8142. emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
  8143. break;
  8144. case GLSLstd450UnpackSnorm2x16:
  8145. emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
  8146. break;
  8147. case GLSLstd450UnpackUnorm2x16:
  8148. emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
  8149. break;
  8150. case GLSLstd450UnpackHalf2x16:
  8151. emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
  8152. break;
  8153. case GLSLstd450PackDouble2x32:
  8154. emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
  8155. break;
  8156. case GLSLstd450UnpackDouble2x32:
  8157. emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
  8158. break;
  8159. // Vector math
  8160. case GLSLstd450Length:
  8161. emit_unary_func_op(result_type, id, args[0], "length");
  8162. break;
  8163. case GLSLstd450Distance:
  8164. emit_binary_func_op(result_type, id, args[0], args[1], "distance");
  8165. break;
  8166. case GLSLstd450Cross:
  8167. emit_binary_func_op(result_type, id, args[0], args[1], "cross");
  8168. break;
  8169. case GLSLstd450Normalize:
  8170. emit_unary_func_op(result_type, id, args[0], "normalize");
  8171. break;
  8172. case GLSLstd450FaceForward:
  8173. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
  8174. break;
  8175. case GLSLstd450Reflect:
  8176. emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
  8177. break;
  8178. case GLSLstd450Refract:
  8179. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
  8180. break;
  8181. // Bit-fiddling
  8182. case GLSLstd450FindILsb:
  8183. // findLSB always returns int.
  8184. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
  8185. break;
  8186. case GLSLstd450FindSMsb:
  8187. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
  8188. break;
  8189. case GLSLstd450FindUMsb:
  8190. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
  8191. int_type); // findMSB always returns int.
  8192. break;
  8193. // Multisampled varying
  8194. case GLSLstd450InterpolateAtCentroid:
  8195. emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
  8196. break;
  8197. case GLSLstd450InterpolateAtSample:
  8198. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
  8199. break;
  8200. case GLSLstd450InterpolateAtOffset:
  8201. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
  8202. break;
  8203. case GLSLstd450NMin:
  8204. case GLSLstd450NMax:
  8205. {
  8206. if (options.vulkan_semantics)
  8207. {
  8208. require_extension_internal("GL_EXT_spirv_intrinsics");
  8209. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8210. Polyfill poly = {};
  8211. switch (get<SPIRType>(result_type).width)
  8212. {
  8213. case 16:
  8214. poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16;
  8215. break;
  8216. case 32:
  8217. poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32;
  8218. break;
  8219. case 64:
  8220. poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64;
  8221. break;
  8222. default:
  8223. SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
  8224. }
  8225. require_polyfill(poly, relaxed);
  8226. // Function return decorations are broken, so need to do double polyfill.
  8227. if (relaxed)
  8228. require_polyfill(poly, false);
  8229. const char *op_str;
  8230. if (relaxed)
  8231. op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed";
  8232. else
  8233. op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax";
  8234. emit_binary_func_op(result_type, id, args[0], args[1], op_str);
  8235. }
  8236. else
  8237. {
  8238. emit_nminmax_op(result_type, id, args[0], args[1], op);
  8239. }
  8240. break;
  8241. }
  8242. case GLSLstd450NClamp:
  8243. {
  8244. if (options.vulkan_semantics)
  8245. {
  8246. require_extension_internal("GL_EXT_spirv_intrinsics");
  8247. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8248. Polyfill poly = {};
  8249. switch (get<SPIRType>(result_type).width)
  8250. {
  8251. case 16:
  8252. poly = PolyfillNClamp16;
  8253. break;
  8254. case 32:
  8255. poly = PolyfillNClamp32;
  8256. break;
  8257. case 64:
  8258. poly = PolyfillNClamp64;
  8259. break;
  8260. default:
  8261. SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
  8262. }
  8263. require_polyfill(poly, relaxed);
  8264. // Function return decorations are broken, so need to do double polyfill.
  8265. if (relaxed)
  8266. require_polyfill(poly, false);
  8267. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp");
  8268. }
  8269. else
  8270. {
  8271. // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
  8272. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
  8273. uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
  8274. if (!max_id)
  8275. max_id = ir.increase_bound_by(1);
  8276. // Inherit precision qualifiers.
  8277. ir.meta[max_id] = ir.meta[id];
  8278. emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
  8279. emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
  8280. }
  8281. break;
  8282. }
  8283. default:
  8284. statement("// unimplemented GLSL op ", eop);
  8285. break;
  8286. }
  8287. }
  8288. void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
  8289. {
  8290. // Need to emulate this call.
  8291. uint32_t &ids = extra_sub_expressions[id];
  8292. if (!ids)
  8293. {
  8294. ids = ir.increase_bound_by(5);
  8295. auto btype = get<SPIRType>(result_type);
  8296. btype.basetype = SPIRType::Boolean;
  8297. set<SPIRType>(ids, btype);
  8298. }
  8299. uint32_t btype_id = ids + 0;
  8300. uint32_t left_nan_id = ids + 1;
  8301. uint32_t right_nan_id = ids + 2;
  8302. uint32_t tmp_id = ids + 3;
  8303. uint32_t mixed_first_id = ids + 4;
  8304. // Inherit precision qualifiers.
  8305. ir.meta[tmp_id] = ir.meta[id];
  8306. ir.meta[mixed_first_id] = ir.meta[id];
  8307. if (!is_legacy())
  8308. {
  8309. emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
  8310. emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
  8311. }
  8312. else if (expression_type(op0).vecsize > 1)
  8313. {
  8314. // If the number doesn't equal itself, it must be NaN
  8315. emit_binary_func_op(btype_id, left_nan_id, op0, op0, "notEqual");
  8316. emit_binary_func_op(btype_id, right_nan_id, op1, op1, "notEqual");
  8317. }
  8318. else
  8319. {
  8320. emit_binary_op(btype_id, left_nan_id, op0, op0, "!=");
  8321. emit_binary_op(btype_id, right_nan_id, op1, op1, "!=");
  8322. }
  8323. emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
  8324. emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
  8325. emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
  8326. }
  8327. void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
  8328. {
  8329. const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
  8330. std::string expr;
  8331. bool forward = should_forward(op0);
  8332. switch (op)
  8333. {
  8334. case GLSLstd450Asinh:
  8335. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  8336. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))");
  8337. emit_op(result_type, id, expr, forward);
  8338. break;
  8339. case GLSLstd450Acosh:
  8340. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  8341. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))");
  8342. break;
  8343. case GLSLstd450Atanh:
  8344. expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / "
  8345. "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5",
  8346. backend.float_literal_suffix ? "f" : "");
  8347. break;
  8348. default:
  8349. SPIRV_CROSS_THROW("Invalid op.");
  8350. }
  8351. emit_op(result_type, id, expr, forward);
  8352. inherit_expression_dependencies(id, op0);
  8353. }
  8354. void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  8355. uint32_t)
  8356. {
  8357. require_extension_internal("GL_AMD_shader_ballot");
  8358. enum AMDShaderBallot
  8359. {
  8360. SwizzleInvocationsAMD = 1,
  8361. SwizzleInvocationsMaskedAMD = 2,
  8362. WriteInvocationAMD = 3,
  8363. MbcntAMD = 4
  8364. };
  8365. auto op = static_cast<AMDShaderBallot>(eop);
  8366. switch (op)
  8367. {
  8368. case SwizzleInvocationsAMD:
  8369. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
  8370. register_control_dependent_expression(id);
  8371. break;
  8372. case SwizzleInvocationsMaskedAMD:
  8373. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
  8374. register_control_dependent_expression(id);
  8375. break;
  8376. case WriteInvocationAMD:
  8377. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
  8378. register_control_dependent_expression(id);
  8379. break;
  8380. case MbcntAMD:
  8381. emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
  8382. register_control_dependent_expression(id);
  8383. break;
  8384. default:
  8385. statement("// unimplemented SPV AMD shader ballot op ", eop);
  8386. break;
  8387. }
  8388. }
  8389. void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
  8390. const uint32_t *args, uint32_t)
  8391. {
  8392. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  8393. enum AMDShaderExplicitVertexParameter
  8394. {
  8395. InterpolateAtVertexAMD = 1
  8396. };
  8397. auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
  8398. switch (op)
  8399. {
  8400. case InterpolateAtVertexAMD:
  8401. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
  8402. break;
  8403. default:
  8404. statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
  8405. break;
  8406. }
  8407. }
  8408. void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  8409. const uint32_t *args, uint32_t)
  8410. {
  8411. require_extension_internal("GL_AMD_shader_trinary_minmax");
  8412. enum AMDShaderTrinaryMinMax
  8413. {
  8414. FMin3AMD = 1,
  8415. UMin3AMD = 2,
  8416. SMin3AMD = 3,
  8417. FMax3AMD = 4,
  8418. UMax3AMD = 5,
  8419. SMax3AMD = 6,
  8420. FMid3AMD = 7,
  8421. UMid3AMD = 8,
  8422. SMid3AMD = 9
  8423. };
  8424. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  8425. switch (op)
  8426. {
  8427. case FMin3AMD:
  8428. case UMin3AMD:
  8429. case SMin3AMD:
  8430. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
  8431. break;
  8432. case FMax3AMD:
  8433. case UMax3AMD:
  8434. case SMax3AMD:
  8435. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
  8436. break;
  8437. case FMid3AMD:
  8438. case UMid3AMD:
  8439. case SMid3AMD:
  8440. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
  8441. break;
  8442. default:
  8443. statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
  8444. break;
  8445. }
  8446. }
  8447. void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  8448. uint32_t)
  8449. {
  8450. require_extension_internal("GL_AMD_gcn_shader");
  8451. enum AMDGCNShader
  8452. {
  8453. CubeFaceIndexAMD = 1,
  8454. CubeFaceCoordAMD = 2,
  8455. TimeAMD = 3
  8456. };
  8457. auto op = static_cast<AMDGCNShader>(eop);
  8458. switch (op)
  8459. {
  8460. case CubeFaceIndexAMD:
  8461. emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
  8462. break;
  8463. case CubeFaceCoordAMD:
  8464. emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
  8465. break;
  8466. case TimeAMD:
  8467. {
  8468. string expr = "timeAMD()";
  8469. emit_op(result_type, id, expr, true);
  8470. register_control_dependent_expression(id);
  8471. break;
  8472. }
  8473. default:
  8474. statement("// unimplemented SPV AMD gcn shader op ", eop);
  8475. break;
  8476. }
  8477. }
  8478. void CompilerGLSL::emit_subgroup_op(const Instruction &i)
  8479. {
  8480. const uint32_t *ops = stream(i);
  8481. auto op = static_cast<Op>(i.op);
  8482. if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
  8483. SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
  8484. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  8485. uint32_t integer_width = get_integer_width_for_instruction(i);
  8486. auto int_type = to_signed_basetype(integer_width);
  8487. auto uint_type = to_unsigned_basetype(integer_width);
  8488. if (options.vulkan_semantics)
  8489. {
  8490. auto &return_type = get<SPIRType>(ops[0]);
  8491. switch (return_type.basetype)
  8492. {
  8493. case SPIRType::SByte:
  8494. case SPIRType::UByte:
  8495. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int8");
  8496. break;
  8497. case SPIRType::Short:
  8498. case SPIRType::UShort:
  8499. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int16");
  8500. break;
  8501. case SPIRType::Half:
  8502. require_extension_internal("GL_EXT_shader_subgroup_extended_types_float16");
  8503. break;
  8504. case SPIRType::Int64:
  8505. case SPIRType::UInt64:
  8506. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int64");
  8507. break;
  8508. default:
  8509. break;
  8510. }
  8511. }
  8512. switch (op)
  8513. {
  8514. case OpGroupNonUniformElect:
  8515. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
  8516. break;
  8517. case OpGroupNonUniformBallotBitCount:
  8518. {
  8519. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  8520. if (operation == GroupOperationReduce)
  8521. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
  8522. else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
  8523. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8524. }
  8525. break;
  8526. case OpGroupNonUniformBallotBitExtract:
  8527. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
  8528. break;
  8529. case OpGroupNonUniformInverseBallot:
  8530. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8531. break;
  8532. case OpGroupNonUniformBallot:
  8533. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
  8534. break;
  8535. case OpGroupNonUniformBallotFindLSB:
  8536. case OpGroupNonUniformBallotFindMSB:
  8537. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
  8538. break;
  8539. case OpGroupNonUniformBroadcast:
  8540. case OpGroupNonUniformBroadcastFirst:
  8541. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
  8542. break;
  8543. case OpGroupNonUniformShuffle:
  8544. case OpGroupNonUniformShuffleXor:
  8545. require_extension_internal("GL_KHR_shader_subgroup_shuffle");
  8546. break;
  8547. case OpGroupNonUniformShuffleUp:
  8548. case OpGroupNonUniformShuffleDown:
  8549. require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
  8550. break;
  8551. case OpGroupNonUniformRotateKHR:
  8552. require_extension_internal("GL_KHR_shader_subgroup_rotate");
  8553. break;
  8554. case OpGroupNonUniformAll:
  8555. case OpGroupNonUniformAny:
  8556. case OpGroupNonUniformAllEqual:
  8557. {
  8558. const SPIRType &type = expression_type(ops[3]);
  8559. if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
  8560. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
  8561. else
  8562. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
  8563. }
  8564. break;
  8565. // clang-format off
  8566. #define GLSL_GROUP_OP(OP)\
  8567. case OpGroupNonUniform##OP:\
  8568. {\
  8569. auto operation = static_cast<GroupOperation>(ops[3]);\
  8570. if (operation == GroupOperationClusteredReduce)\
  8571. require_extension_internal("GL_KHR_shader_subgroup_clustered");\
  8572. else if (operation == GroupOperationReduce)\
  8573. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
  8574. else if (operation == GroupOperationExclusiveScan)\
  8575. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
  8576. else if (operation == GroupOperationInclusiveScan)\
  8577. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
  8578. else\
  8579. SPIRV_CROSS_THROW("Invalid group operation.");\
  8580. break;\
  8581. }
  8582. GLSL_GROUP_OP(IAdd)
  8583. GLSL_GROUP_OP(FAdd)
  8584. GLSL_GROUP_OP(IMul)
  8585. GLSL_GROUP_OP(FMul)
  8586. #undef GLSL_GROUP_OP
  8587. // clang-format on
  8588. case OpGroupNonUniformFMin:
  8589. case OpGroupNonUniformFMax:
  8590. case OpGroupNonUniformSMin:
  8591. case OpGroupNonUniformSMax:
  8592. case OpGroupNonUniformUMin:
  8593. case OpGroupNonUniformUMax:
  8594. case OpGroupNonUniformBitwiseAnd:
  8595. case OpGroupNonUniformBitwiseOr:
  8596. case OpGroupNonUniformBitwiseXor:
  8597. case OpGroupNonUniformLogicalAnd:
  8598. case OpGroupNonUniformLogicalOr:
  8599. case OpGroupNonUniformLogicalXor:
  8600. {
  8601. auto operation = static_cast<GroupOperation>(ops[3]);
  8602. if (operation == GroupOperationClusteredReduce)
  8603. {
  8604. require_extension_internal("GL_KHR_shader_subgroup_clustered");
  8605. }
  8606. else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
  8607. operation == GroupOperationReduce)
  8608. {
  8609. require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
  8610. }
  8611. else
  8612. SPIRV_CROSS_THROW("Invalid group operation.");
  8613. break;
  8614. }
  8615. case OpGroupNonUniformQuadSwap:
  8616. case OpGroupNonUniformQuadBroadcast:
  8617. require_extension_internal("GL_KHR_shader_subgroup_quad");
  8618. break;
  8619. case OpGroupNonUniformQuadAllKHR:
  8620. case OpGroupNonUniformQuadAnyKHR:
  8621. // Require both extensions to be enabled.
  8622. require_extension_internal("GL_KHR_shader_subgroup_vote");
  8623. require_extension_internal("GL_EXT_shader_quad_control");
  8624. break;
  8625. default:
  8626. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8627. }
  8628. uint32_t result_type = ops[0];
  8629. uint32_t id = ops[1];
  8630. // These quad ops do not have a scope parameter.
  8631. if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR)
  8632. {
  8633. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  8634. if (scope != ScopeSubgroup)
  8635. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  8636. }
  8637. switch (op)
  8638. {
  8639. case OpGroupNonUniformElect:
  8640. emit_op(result_type, id, "subgroupElect()", true);
  8641. break;
  8642. case OpGroupNonUniformBroadcast:
  8643. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
  8644. break;
  8645. case OpGroupNonUniformBroadcastFirst:
  8646. emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
  8647. break;
  8648. case OpGroupNonUniformBallot:
  8649. emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
  8650. break;
  8651. case OpGroupNonUniformInverseBallot:
  8652. emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
  8653. break;
  8654. case OpGroupNonUniformBallotBitExtract:
  8655. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
  8656. break;
  8657. case OpGroupNonUniformBallotFindLSB:
  8658. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
  8659. break;
  8660. case OpGroupNonUniformBallotFindMSB:
  8661. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
  8662. break;
  8663. case OpGroupNonUniformBallotBitCount:
  8664. {
  8665. auto operation = static_cast<GroupOperation>(ops[3]);
  8666. if (operation == GroupOperationReduce)
  8667. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
  8668. else if (operation == GroupOperationInclusiveScan)
  8669. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
  8670. else if (operation == GroupOperationExclusiveScan)
  8671. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
  8672. else
  8673. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  8674. break;
  8675. }
  8676. case OpGroupNonUniformShuffle:
  8677. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
  8678. break;
  8679. case OpGroupNonUniformShuffleXor:
  8680. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
  8681. break;
  8682. case OpGroupNonUniformShuffleUp:
  8683. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
  8684. break;
  8685. case OpGroupNonUniformShuffleDown:
  8686. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
  8687. break;
  8688. case OpGroupNonUniformRotateKHR:
  8689. if (i.length > 5)
  8690. emit_trinary_func_op(result_type, id, ops[3], ops[4], ops[5], "subgroupClusteredRotate");
  8691. else
  8692. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupRotate");
  8693. break;
  8694. case OpGroupNonUniformAll:
  8695. emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
  8696. break;
  8697. case OpGroupNonUniformAny:
  8698. emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
  8699. break;
  8700. case OpGroupNonUniformAllEqual:
  8701. emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
  8702. break;
  8703. // clang-format off
  8704. #define GLSL_GROUP_OP(op, glsl_op) \
  8705. case OpGroupNonUniform##op: \
  8706. { \
  8707. auto operation = static_cast<GroupOperation>(ops[3]); \
  8708. if (operation == GroupOperationReduce) \
  8709. emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
  8710. else if (operation == GroupOperationInclusiveScan) \
  8711. emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
  8712. else if (operation == GroupOperationExclusiveScan) \
  8713. emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
  8714. else if (operation == GroupOperationClusteredReduce) \
  8715. emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
  8716. else \
  8717. SPIRV_CROSS_THROW("Invalid group operation."); \
  8718. break; \
  8719. }
  8720. #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
  8721. case OpGroupNonUniform##op: \
  8722. { \
  8723. auto operation = static_cast<GroupOperation>(ops[3]); \
  8724. if (operation == GroupOperationReduce) \
  8725. emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
  8726. else if (operation == GroupOperationInclusiveScan) \
  8727. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
  8728. else if (operation == GroupOperationExclusiveScan) \
  8729. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
  8730. else if (operation == GroupOperationClusteredReduce) \
  8731. emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
  8732. else \
  8733. SPIRV_CROSS_THROW("Invalid group operation."); \
  8734. break; \
  8735. }
  8736. GLSL_GROUP_OP(FAdd, Add)
  8737. GLSL_GROUP_OP(FMul, Mul)
  8738. GLSL_GROUP_OP(FMin, Min)
  8739. GLSL_GROUP_OP(FMax, Max)
  8740. GLSL_GROUP_OP(IAdd, Add)
  8741. GLSL_GROUP_OP(IMul, Mul)
  8742. GLSL_GROUP_OP_CAST(SMin, Min, int_type)
  8743. GLSL_GROUP_OP_CAST(SMax, Max, int_type)
  8744. GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  8745. GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  8746. GLSL_GROUP_OP(BitwiseAnd, And)
  8747. GLSL_GROUP_OP(BitwiseOr, Or)
  8748. GLSL_GROUP_OP(BitwiseXor, Xor)
  8749. GLSL_GROUP_OP(LogicalAnd, And)
  8750. GLSL_GROUP_OP(LogicalOr, Or)
  8751. GLSL_GROUP_OP(LogicalXor, Xor)
  8752. #undef GLSL_GROUP_OP
  8753. #undef GLSL_GROUP_OP_CAST
  8754. // clang-format on
  8755. case OpGroupNonUniformQuadSwap:
  8756. {
  8757. uint32_t direction = evaluate_constant_u32(ops[4]);
  8758. if (direction == 0)
  8759. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
  8760. else if (direction == 1)
  8761. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
  8762. else if (direction == 2)
  8763. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
  8764. else
  8765. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  8766. break;
  8767. }
  8768. case OpGroupNonUniformQuadBroadcast:
  8769. {
  8770. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
  8771. break;
  8772. }
  8773. case OpGroupNonUniformQuadAllKHR:
  8774. emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAll");
  8775. break;
  8776. case OpGroupNonUniformQuadAnyKHR:
  8777. emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAny");
  8778. break;
  8779. default:
  8780. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8781. }
  8782. register_control_dependent_expression(id);
  8783. }
  8784. string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  8785. {
  8786. // OpBitcast can deal with pointers.
  8787. if (out_type.pointer || in_type.pointer)
  8788. {
  8789. if (out_type.vecsize == 2 || in_type.vecsize == 2)
  8790. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  8791. return type_to_glsl(out_type);
  8792. }
  8793. if (out_type.basetype == in_type.basetype)
  8794. return "";
  8795. assert(out_type.basetype != SPIRType::Boolean);
  8796. assert(in_type.basetype != SPIRType::Boolean);
  8797. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
  8798. bool same_size_cast = out_type.width == in_type.width;
  8799. // Trivial bitcast case, casts between integers.
  8800. if (integral_cast && same_size_cast)
  8801. return type_to_glsl(out_type);
  8802. // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
  8803. if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
  8804. return "unpack8";
  8805. else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
  8806. return "pack16";
  8807. else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
  8808. return "pack32";
  8809. // Floating <-> Integer special casts. Just have to enumerate all cases. :(
  8810. // 16-bit, 32-bit and 64-bit floats.
  8811. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  8812. {
  8813. if (is_legacy_es())
  8814. SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
  8815. else if (!options.es && options.version < 330)
  8816. require_extension_internal("GL_ARB_shader_bit_encoding");
  8817. return "floatBitsToUint";
  8818. }
  8819. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  8820. {
  8821. if (is_legacy_es())
  8822. SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
  8823. else if (!options.es && options.version < 330)
  8824. require_extension_internal("GL_ARB_shader_bit_encoding");
  8825. return "floatBitsToInt";
  8826. }
  8827. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  8828. {
  8829. if (is_legacy_es())
  8830. SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
  8831. else if (!options.es && options.version < 330)
  8832. require_extension_internal("GL_ARB_shader_bit_encoding");
  8833. return "uintBitsToFloat";
  8834. }
  8835. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  8836. {
  8837. if (is_legacy_es())
  8838. SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
  8839. else if (!options.es && options.version < 330)
  8840. require_extension_internal("GL_ARB_shader_bit_encoding");
  8841. return "intBitsToFloat";
  8842. }
  8843. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  8844. return "doubleBitsToInt64";
  8845. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  8846. return "doubleBitsToUint64";
  8847. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  8848. return "int64BitsToDouble";
  8849. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  8850. return "uint64BitsToDouble";
  8851. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
  8852. return "float16BitsToInt16";
  8853. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  8854. return "float16BitsToUint16";
  8855. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
  8856. return "int16BitsToFloat16";
  8857. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  8858. return "uint16BitsToFloat16";
  8859. // And finally, some even more special purpose casts.
  8860. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
  8861. return "packUint2x32";
  8862. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
  8863. return "unpackUint2x32";
  8864. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8865. return "unpackFloat2x16";
  8866. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  8867. return "packFloat2x16";
  8868. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
  8869. return "packInt2x16";
  8870. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
  8871. return "unpackInt2x16";
  8872. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
  8873. return "packUint2x16";
  8874. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8875. return "unpackUint2x16";
  8876. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
  8877. return "packInt4x16";
  8878. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
  8879. return "unpackInt4x16";
  8880. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
  8881. return "packUint4x16";
  8882. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
  8883. return "unpackUint4x16";
  8884. else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::UShort)
  8885. return "uintBitsToBFloat16EXT";
  8886. else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::Short)
  8887. return "intBitsToBFloat16EXT";
  8888. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::BFloat16)
  8889. return "bfloat16BitsToUintEXT";
  8890. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::BFloat16)
  8891. return "bfloat16BitsToIntEXT";
  8892. else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::UByte)
  8893. return "uintBitsToFloate4m3EXT";
  8894. else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::SByte)
  8895. return "intBitsToFloate4m3EXT";
  8896. else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE4M3)
  8897. return "floate4m3BitsToUintEXT";
  8898. else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE4M3)
  8899. return "floate4m3BitsToIntEXT";
  8900. else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::UByte)
  8901. return "uintBitsToFloate5m2EXT";
  8902. else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::SByte)
  8903. return "intBitsToFloate5m2EXT";
  8904. else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE5M2)
  8905. return "floate5m2BitsToUintEXT";
  8906. else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE5M2)
  8907. return "floate5m2BitsToIntEXT";
  8908. return "";
  8909. }
  8910. string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
  8911. {
  8912. auto op = bitcast_glsl_op(result_type, expression_type(argument));
  8913. if (op.empty())
  8914. return to_enclosed_unpacked_expression(argument);
  8915. else
  8916. return join(op, "(", to_unpacked_expression(argument), ")");
  8917. }
  8918. std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
  8919. {
  8920. auto expr = to_expression(arg);
  8921. auto &src_type = expression_type(arg);
  8922. if (src_type.basetype != target_type)
  8923. {
  8924. auto target = src_type;
  8925. target.basetype = target_type;
  8926. expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
  8927. }
  8928. return expr;
  8929. }
  8930. std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
  8931. const std::string &expr)
  8932. {
  8933. if (target_type.basetype == expr_type)
  8934. return expr;
  8935. auto src_type = target_type;
  8936. src_type.basetype = expr_type;
  8937. return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
  8938. }
  8939. string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  8940. {
  8941. switch (builtin)
  8942. {
  8943. case BuiltInPosition:
  8944. return "gl_Position";
  8945. case BuiltInPointSize:
  8946. return "gl_PointSize";
  8947. case BuiltInClipDistance:
  8948. {
  8949. if (options.es)
  8950. require_extension_internal("GL_EXT_clip_cull_distance");
  8951. return "gl_ClipDistance";
  8952. }
  8953. case BuiltInCullDistance:
  8954. {
  8955. if (options.es)
  8956. require_extension_internal("GL_EXT_clip_cull_distance");
  8957. return "gl_CullDistance";
  8958. }
  8959. case BuiltInVertexId:
  8960. if (options.vulkan_semantics)
  8961. SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
  8962. "with GL semantics.");
  8963. return "gl_VertexID";
  8964. case BuiltInInstanceId:
  8965. if (options.vulkan_semantics)
  8966. {
  8967. auto model = get_entry_point().model;
  8968. switch (model)
  8969. {
  8970. case ExecutionModelIntersectionKHR:
  8971. case ExecutionModelAnyHitKHR:
  8972. case ExecutionModelClosestHitKHR:
  8973. // gl_InstanceID is allowed in these shaders.
  8974. break;
  8975. default:
  8976. SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
  8977. "created with GL semantics.");
  8978. }
  8979. }
  8980. if (!options.es && options.version < 140)
  8981. {
  8982. require_extension_internal("GL_ARB_draw_instanced");
  8983. }
  8984. return "gl_InstanceID";
  8985. case BuiltInVertexIndex:
  8986. if (options.vulkan_semantics)
  8987. return "gl_VertexIndex";
  8988. else
  8989. return "gl_VertexID"; // gl_VertexID already has the base offset applied.
  8990. case BuiltInInstanceIndex:
  8991. if (options.vulkan_semantics)
  8992. return "gl_InstanceIndex";
  8993. if (!options.es && options.version < 140)
  8994. {
  8995. require_extension_internal("GL_ARB_draw_instanced");
  8996. }
  8997. if (options.vertex.support_nonzero_base_instance)
  8998. {
  8999. if (!options.vulkan_semantics)
  9000. {
  9001. // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
  9002. require_extension_internal("GL_ARB_shader_draw_parameters");
  9003. }
  9004. return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
  9005. }
  9006. else
  9007. return "gl_InstanceID";
  9008. case BuiltInPrimitiveId:
  9009. if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
  9010. return "gl_PrimitiveIDIn";
  9011. else
  9012. return "gl_PrimitiveID";
  9013. case BuiltInInvocationId:
  9014. return "gl_InvocationID";
  9015. case BuiltInLayer:
  9016. {
  9017. auto model = get_execution_model();
  9018. if (model == ExecutionModelVertex || model == ExecutionModelTessellationEvaluation)
  9019. {
  9020. if (options.es)
  9021. require_extension_internal("GL_NV_viewport_array2");
  9022. else
  9023. require_extension_internal("GL_ARB_shader_viewport_layer_array");
  9024. }
  9025. return "gl_Layer";
  9026. }
  9027. case BuiltInViewportIndex:
  9028. return "gl_ViewportIndex";
  9029. case BuiltInTessLevelOuter:
  9030. return "gl_TessLevelOuter";
  9031. case BuiltInTessLevelInner:
  9032. return "gl_TessLevelInner";
  9033. case BuiltInTessCoord:
  9034. return "gl_TessCoord";
  9035. case BuiltInPatchVertices:
  9036. return "gl_PatchVerticesIn";
  9037. case BuiltInFragCoord:
  9038. return "gl_FragCoord";
  9039. case BuiltInPointCoord:
  9040. return "gl_PointCoord";
  9041. case BuiltInFrontFacing:
  9042. return "gl_FrontFacing";
  9043. case BuiltInFragDepth:
  9044. return "gl_FragDepth";
  9045. case BuiltInNumWorkgroups:
  9046. return "gl_NumWorkGroups";
  9047. case BuiltInWorkgroupSize:
  9048. return "gl_WorkGroupSize";
  9049. case BuiltInWorkgroupId:
  9050. return "gl_WorkGroupID";
  9051. case BuiltInLocalInvocationId:
  9052. return "gl_LocalInvocationID";
  9053. case BuiltInGlobalInvocationId:
  9054. return "gl_GlobalInvocationID";
  9055. case BuiltInLocalInvocationIndex:
  9056. return "gl_LocalInvocationIndex";
  9057. case BuiltInHelperInvocation:
  9058. return "gl_HelperInvocation";
  9059. case BuiltInBaseVertex:
  9060. if (options.es)
  9061. SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
  9062. if (options.vulkan_semantics)
  9063. {
  9064. if (options.version < 460)
  9065. {
  9066. require_extension_internal("GL_ARB_shader_draw_parameters");
  9067. return "gl_BaseVertexARB";
  9068. }
  9069. return "gl_BaseVertex";
  9070. }
  9071. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9072. require_extension_internal("GL_ARB_shader_draw_parameters");
  9073. return "SPIRV_Cross_BaseVertex";
  9074. case BuiltInBaseInstance:
  9075. if (options.es)
  9076. SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
  9077. if (options.vulkan_semantics)
  9078. {
  9079. if (options.version < 460)
  9080. {
  9081. require_extension_internal("GL_ARB_shader_draw_parameters");
  9082. return "gl_BaseInstanceARB";
  9083. }
  9084. return "gl_BaseInstance";
  9085. }
  9086. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9087. require_extension_internal("GL_ARB_shader_draw_parameters");
  9088. return "SPIRV_Cross_BaseInstance";
  9089. case BuiltInDrawIndex:
  9090. if (options.es)
  9091. SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
  9092. if (options.vulkan_semantics)
  9093. {
  9094. if (options.version < 460)
  9095. {
  9096. require_extension_internal("GL_ARB_shader_draw_parameters");
  9097. return "gl_DrawIDARB";
  9098. }
  9099. return "gl_DrawID";
  9100. }
  9101. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9102. require_extension_internal("GL_ARB_shader_draw_parameters");
  9103. return "gl_DrawIDARB";
  9104. case BuiltInSampleId:
  9105. if (is_legacy())
  9106. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9107. else if (options.es && options.version < 320)
  9108. require_extension_internal("GL_OES_sample_variables");
  9109. else if (!options.es && options.version < 400)
  9110. require_extension_internal("GL_ARB_sample_shading");
  9111. return "gl_SampleID";
  9112. case BuiltInSampleMask:
  9113. if (is_legacy())
  9114. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9115. else if (options.es && options.version < 320)
  9116. require_extension_internal("GL_OES_sample_variables");
  9117. else if (!options.es && options.version < 400)
  9118. require_extension_internal("GL_ARB_sample_shading");
  9119. if (storage == StorageClassInput)
  9120. return "gl_SampleMaskIn";
  9121. else
  9122. return "gl_SampleMask";
  9123. case BuiltInSamplePosition:
  9124. if (is_legacy())
  9125. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9126. else if (options.es && options.version < 320)
  9127. require_extension_internal("GL_OES_sample_variables");
  9128. else if (!options.es && options.version < 400)
  9129. require_extension_internal("GL_ARB_sample_shading");
  9130. return "gl_SamplePosition";
  9131. case BuiltInViewIndex:
  9132. if (options.vulkan_semantics)
  9133. return "gl_ViewIndex";
  9134. else
  9135. return "gl_ViewID_OVR";
  9136. case BuiltInNumSubgroups:
  9137. request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
  9138. return "gl_NumSubgroups";
  9139. case BuiltInSubgroupId:
  9140. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
  9141. return "gl_SubgroupID";
  9142. case BuiltInSubgroupSize:
  9143. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
  9144. return "gl_SubgroupSize";
  9145. case BuiltInSubgroupLocalInvocationId:
  9146. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
  9147. return "gl_SubgroupInvocationID";
  9148. case BuiltInSubgroupEqMask:
  9149. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9150. return "gl_SubgroupEqMask";
  9151. case BuiltInSubgroupGeMask:
  9152. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9153. return "gl_SubgroupGeMask";
  9154. case BuiltInSubgroupGtMask:
  9155. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9156. return "gl_SubgroupGtMask";
  9157. case BuiltInSubgroupLeMask:
  9158. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9159. return "gl_SubgroupLeMask";
  9160. case BuiltInSubgroupLtMask:
  9161. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9162. return "gl_SubgroupLtMask";
  9163. case BuiltInLaunchIdKHR:
  9164. return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
  9165. case BuiltInLaunchSizeKHR:
  9166. return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
  9167. case BuiltInWorldRayOriginKHR:
  9168. return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
  9169. case BuiltInWorldRayDirectionKHR:
  9170. return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
  9171. case BuiltInObjectRayOriginKHR:
  9172. return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
  9173. case BuiltInObjectRayDirectionKHR:
  9174. return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
  9175. case BuiltInRayTminKHR:
  9176. return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
  9177. case BuiltInRayTmaxKHR:
  9178. return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
  9179. case BuiltInInstanceCustomIndexKHR:
  9180. return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
  9181. case BuiltInObjectToWorldKHR:
  9182. return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
  9183. case BuiltInWorldToObjectKHR:
  9184. return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
  9185. case BuiltInHitTNV:
  9186. // gl_HitTEXT is an alias of RayTMax in KHR.
  9187. return "gl_HitTNV";
  9188. case BuiltInHitKindKHR:
  9189. return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
  9190. case BuiltInIncomingRayFlagsKHR:
  9191. return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
  9192. case BuiltInBaryCoordKHR:
  9193. {
  9194. if (options.es && options.version < 320)
  9195. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
  9196. else if (!options.es && options.version < 450)
  9197. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
  9198. if (barycentric_is_nv)
  9199. {
  9200. require_extension_internal("GL_NV_fragment_shader_barycentric");
  9201. return "gl_BaryCoordNV";
  9202. }
  9203. else
  9204. {
  9205. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  9206. return "gl_BaryCoordEXT";
  9207. }
  9208. }
  9209. case BuiltInBaryCoordNoPerspNV:
  9210. {
  9211. if (options.es && options.version < 320)
  9212. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
  9213. else if (!options.es && options.version < 450)
  9214. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
  9215. if (barycentric_is_nv)
  9216. {
  9217. require_extension_internal("GL_NV_fragment_shader_barycentric");
  9218. return "gl_BaryCoordNoPerspNV";
  9219. }
  9220. else
  9221. {
  9222. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  9223. return "gl_BaryCoordNoPerspEXT";
  9224. }
  9225. }
  9226. case BuiltInFragStencilRefEXT:
  9227. {
  9228. if (!options.es)
  9229. {
  9230. require_extension_internal("GL_ARB_shader_stencil_export");
  9231. return "gl_FragStencilRefARB";
  9232. }
  9233. else
  9234. SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
  9235. }
  9236. case BuiltInPrimitiveShadingRateKHR:
  9237. {
  9238. if (!options.vulkan_semantics)
  9239. SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
  9240. require_extension_internal("GL_EXT_fragment_shading_rate");
  9241. return "gl_PrimitiveShadingRateEXT";
  9242. }
  9243. case BuiltInShadingRateKHR:
  9244. {
  9245. if (!options.vulkan_semantics)
  9246. SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
  9247. require_extension_internal("GL_EXT_fragment_shading_rate");
  9248. return "gl_ShadingRateEXT";
  9249. }
  9250. case BuiltInDeviceIndex:
  9251. if (!options.vulkan_semantics)
  9252. SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
  9253. require_extension_internal("GL_EXT_device_group");
  9254. return "gl_DeviceIndex";
  9255. case BuiltInFullyCoveredEXT:
  9256. if (!options.es)
  9257. require_extension_internal("GL_NV_conservative_raster_underestimation");
  9258. else
  9259. SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
  9260. return "gl_FragFullyCoveredNV";
  9261. case BuiltInPrimitiveTriangleIndicesEXT:
  9262. return "gl_PrimitiveTriangleIndicesEXT";
  9263. case BuiltInPrimitiveLineIndicesEXT:
  9264. return "gl_PrimitiveLineIndicesEXT";
  9265. case BuiltInPrimitivePointIndicesEXT:
  9266. return "gl_PrimitivePointIndicesEXT";
  9267. case BuiltInCullPrimitiveEXT:
  9268. return "gl_CullPrimitiveEXT";
  9269. case BuiltInHitTriangleVertexPositionsKHR:
  9270. {
  9271. if (!options.vulkan_semantics)
  9272. SPIRV_CROSS_THROW("Need Vulkan semantics for EXT_ray_tracing_position_fetch.");
  9273. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  9274. return "gl_HitTriangleVertexPositionsEXT";
  9275. }
  9276. case BuiltInClusterIDNV:
  9277. {
  9278. if (!options.vulkan_semantics)
  9279. SPIRV_CROSS_THROW("Can only use ClusterIDNV in Vulkan GLSL.");
  9280. require_extension_internal("GL_NV_cluster_acceleration_structure");
  9281. return "gl_ClusterIDNV";
  9282. }
  9283. default:
  9284. return join("gl_BuiltIn_", convert_to_string(builtin));
  9285. }
  9286. }
  9287. const char *CompilerGLSL::index_to_swizzle(uint32_t index)
  9288. {
  9289. switch (index)
  9290. {
  9291. case 0:
  9292. return "x";
  9293. case 1:
  9294. return "y";
  9295. case 2:
  9296. return "z";
  9297. case 3:
  9298. return "w";
  9299. default:
  9300. return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
  9301. }
  9302. }
  9303. void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
  9304. AccessChainFlags flags, bool &access_chain_is_arrayed,
  9305. uint32_t index)
  9306. {
  9307. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  9308. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  9309. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  9310. string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read);
  9311. // For the case where the base of an OpPtrAccessChain already ends in [n],
  9312. // we need to use the index as an offset to the existing index, otherwise,
  9313. // we can just use the index directly.
  9314. if (ptr_chain && access_chain_is_arrayed)
  9315. {
  9316. size_t split_pos = expr.find_last_of(']');
  9317. size_t enclose_split = expr.find_last_of(')');
  9318. // If we have already enclosed the expression, don't try to be clever, it will break.
  9319. if (split_pos > enclose_split || enclose_split == string::npos)
  9320. {
  9321. string expr_front = expr.substr(0, split_pos);
  9322. string expr_back = expr.substr(split_pos);
  9323. expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back;
  9324. return;
  9325. }
  9326. }
  9327. expr += "[";
  9328. expr += idx_expr;
  9329. expr += "]";
  9330. }
  9331. bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
  9332. {
  9333. return true;
  9334. }
  9335. string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
  9336. AccessChainFlags flags, AccessChainMeta *meta)
  9337. {
  9338. string expr;
  9339. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  9340. bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
  9341. bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
  9342. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  9343. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  9344. bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
  9345. if (!chain_only)
  9346. {
  9347. // We handle transpose explicitly, so don't resolve that here.
  9348. auto *e = maybe_get<SPIRExpression>(base);
  9349. bool old_transpose = e && e->need_transpose;
  9350. if (e)
  9351. e->need_transpose = false;
  9352. expr = to_enclosed_expression(base, register_expression_read);
  9353. if (e)
  9354. e->need_transpose = old_transpose;
  9355. }
  9356. // Start traversing type hierarchy at the proper non-pointer types,
  9357. // but keep type_id referencing the original pointer for use below.
  9358. uint32_t type_id = expression_type_id(base);
  9359. const auto *type = &get_pointee_type(type_id);
  9360. if (!backend.native_pointers)
  9361. {
  9362. if (ptr_chain)
  9363. SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
  9364. // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
  9365. // continuing the access chain.
  9366. if (should_dereference(base))
  9367. expr = dereference_expression(get<SPIRType>(type_id), expr);
  9368. }
  9369. else if (should_dereference(base) && type->basetype != SPIRType::Struct && !ptr_chain)
  9370. expr = join("(", dereference_expression(*type, expr), ")");
  9371. bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
  9372. bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
  9373. bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
  9374. uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
  9375. bool is_invariant = has_decoration(base, DecorationInvariant);
  9376. bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision);
  9377. bool pending_array_enclose = false;
  9378. bool dimension_flatten = false;
  9379. bool access_meshlet_position_y = false;
  9380. bool chain_is_builtin = false;
  9381. BuiltIn chained_builtin = {};
  9382. if (auto *base_expr = maybe_get<SPIRExpression>(base))
  9383. {
  9384. access_meshlet_position_y = base_expr->access_meshlet_position_y;
  9385. }
  9386. // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden
  9387. bool hide_first_subscript = count > 1 && is_user_type_structured(base);
  9388. const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
  9389. AccessChainFlags mod_flags = flags;
  9390. if (!is_literal)
  9391. mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
  9392. if (!is_ptr_chain)
  9393. mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
  9394. access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
  9395. if (check_physical_type_cast(expr, type, physical_type))
  9396. physical_type = 0;
  9397. };
  9398. for (uint32_t i = 0; i < count; i++)
  9399. {
  9400. uint32_t index = indices[i];
  9401. bool is_literal = index_is_literal;
  9402. if (is_literal && msb_is_id && (index >> 31u) != 0u)
  9403. {
  9404. is_literal = false;
  9405. index &= 0x7fffffffu;
  9406. }
  9407. bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(*type);
  9408. if (ptr_chain_array_entry)
  9409. {
  9410. // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed.
  9411. // We are considered to have a pointer to array and one element shifts by one array at a time.
  9412. // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness,
  9413. // so we have to take pointer to array explicitly.
  9414. if (!should_dereference(base))
  9415. expr = enclose_expression(address_of_expression(expr));
  9416. }
  9417. if (ptr_chain && i == 0)
  9418. {
  9419. // Pointer chains
  9420. // If we are flattening multidimensional arrays, only create opening bracket on first
  9421. // array index.
  9422. if (options.flatten_multidimensional_arrays)
  9423. {
  9424. dimension_flatten = type->array.size() >= 1;
  9425. pending_array_enclose = dimension_flatten;
  9426. if (pending_array_enclose)
  9427. expr += "[";
  9428. }
  9429. if (options.flatten_multidimensional_arrays && dimension_flatten)
  9430. {
  9431. // If we are flattening multidimensional arrays, do manual stride computation.
  9432. if (is_literal)
  9433. expr += convert_to_string(index);
  9434. else
  9435. expr += to_enclosed_expression(index, register_expression_read);
  9436. for (auto j = uint32_t(type->array.size()); j; j--)
  9437. {
  9438. expr += " * ";
  9439. expr += enclose_expression(to_array_size(*type, j - 1));
  9440. }
  9441. if (type->array.empty())
  9442. pending_array_enclose = false;
  9443. else
  9444. expr += " + ";
  9445. if (!pending_array_enclose)
  9446. expr += "]";
  9447. }
  9448. else
  9449. {
  9450. if (flags & ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT)
  9451. {
  9452. SPIRType tmp_type(OpTypeInt);
  9453. tmp_type.basetype = SPIRType::UInt64;
  9454. tmp_type.width = 64;
  9455. tmp_type.vecsize = 1;
  9456. tmp_type.columns = 1;
  9457. TypeID ptr_type_id = expression_type_id(base);
  9458. const SPIRType &ptr_type = get<SPIRType>(ptr_type_id);
  9459. const SPIRType &pointee_type = get_pointee_type(ptr_type);
  9460. // This only runs in native pointer backends.
  9461. // Can replace reinterpret_cast with a backend string if ever needed.
  9462. // We expect this to count as a de-reference.
  9463. // This leaks some MSL details, but feels slightly overkill to
  9464. // add yet another virtual interface just for this.
  9465. auto intptr_expr = join("reinterpret_cast<", type_to_glsl(tmp_type), ">(", expr, ")");
  9466. intptr_expr += join(" + ", to_enclosed_unpacked_expression(index), " * ",
  9467. get_decoration(ptr_type_id, DecorationArrayStride));
  9468. if (flags & ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT)
  9469. {
  9470. is_packed = true;
  9471. expr = join("*reinterpret_cast<device packed_", type_to_glsl(pointee_type),
  9472. " *>(", intptr_expr, ")");
  9473. }
  9474. else
  9475. {
  9476. expr = join("*reinterpret_cast<", type_to_glsl(ptr_type), ">(", intptr_expr, ")");
  9477. }
  9478. }
  9479. else
  9480. append_index(index, is_literal, true);
  9481. }
  9482. if (type->basetype == SPIRType::ControlPointArray)
  9483. {
  9484. type_id = type->parent_type;
  9485. type = &get<SPIRType>(type_id);
  9486. }
  9487. access_chain_is_arrayed = true;
  9488. // Explicitly enclose the expression if this is one of the weird pointer-to-array cases.
  9489. // We don't want any future indexing to add to this array dereference.
  9490. // Enclosing the expression blocks that and avoids any shenanigans with operand priority.
  9491. if (ptr_chain_array_entry)
  9492. expr = join("(", expr, ")");
  9493. }
  9494. // Arrays and OpTypeCooperativeVectorNV (aka fancy arrays)
  9495. else if (!type->array.empty() || type->op == OpTypeCooperativeVectorNV)
  9496. {
  9497. // If we are flattening multidimensional arrays, only create opening bracket on first
  9498. // array index.
  9499. if (options.flatten_multidimensional_arrays && !pending_array_enclose)
  9500. {
  9501. dimension_flatten = type->array.size() > 1;
  9502. pending_array_enclose = dimension_flatten;
  9503. if (pending_array_enclose)
  9504. expr += "[";
  9505. }
  9506. assert(type->parent_type);
  9507. auto *var = maybe_get<SPIRVariable>(base);
  9508. if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
  9509. !has_decoration(type->self, DecorationBlock))
  9510. {
  9511. // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
  9512. // Normally, these variables live in blocks when compiled from GLSL,
  9513. // but HLSL seems to just emit straight arrays here.
  9514. // We must pretend this access goes through gl_in/gl_out arrays
  9515. // to be able to access certain builtins as arrays.
  9516. // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
  9517. auto builtin = ir.meta[base].decoration.builtin_type;
  9518. bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
  9519. chain_is_builtin = true;
  9520. chained_builtin = builtin;
  9521. switch (builtin)
  9522. {
  9523. case BuiltInCullDistance:
  9524. case BuiltInClipDistance:
  9525. if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here.
  9526. {
  9527. append_index(index, is_literal);
  9528. break;
  9529. }
  9530. // fallthrough
  9531. case BuiltInPosition:
  9532. case BuiltInPointSize:
  9533. if (mesh_shader)
  9534. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  9535. else if (var->storage == StorageClassInput)
  9536. expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
  9537. else if (var->storage == StorageClassOutput)
  9538. expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
  9539. else
  9540. append_index(index, is_literal);
  9541. break;
  9542. case BuiltInPrimitiveId:
  9543. case BuiltInLayer:
  9544. case BuiltInViewportIndex:
  9545. case BuiltInCullPrimitiveEXT:
  9546. case BuiltInPrimitiveShadingRateKHR:
  9547. if (mesh_shader)
  9548. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  9549. else
  9550. append_index(index, is_literal);
  9551. break;
  9552. default:
  9553. append_index(index, is_literal);
  9554. break;
  9555. }
  9556. }
  9557. else if (backend.force_merged_mesh_block && i == 0 && var &&
  9558. !is_builtin_variable(*var) && var->storage == StorageClassOutput)
  9559. {
  9560. if (is_per_primitive_variable(*var))
  9561. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  9562. else
  9563. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  9564. }
  9565. else if (options.flatten_multidimensional_arrays && dimension_flatten)
  9566. {
  9567. // If we are flattening multidimensional arrays, do manual stride computation.
  9568. auto &parent_type = get<SPIRType>(type->parent_type);
  9569. if (is_literal)
  9570. expr += convert_to_string(index);
  9571. else
  9572. expr += to_enclosed_expression(index, register_expression_read);
  9573. for (auto j = uint32_t(parent_type.array.size()); j; j--)
  9574. {
  9575. expr += " * ";
  9576. expr += enclose_expression(to_array_size(parent_type, j - 1));
  9577. }
  9578. if (parent_type.array.empty())
  9579. pending_array_enclose = false;
  9580. else
  9581. expr += " + ";
  9582. if (!pending_array_enclose)
  9583. expr += "]";
  9584. }
  9585. else if (index_is_literal || !builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
  9586. {
  9587. // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
  9588. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
  9589. // For literal indices we are working on composites, so we ignore this since we have already converted to proper array.
  9590. append_index(index, is_literal);
  9591. }
  9592. if (var && has_decoration(var->self, DecorationBuiltIn) &&
  9593. get_decoration(var->self, DecorationBuiltIn) == BuiltInPosition &&
  9594. get_execution_model() == ExecutionModelMeshEXT)
  9595. {
  9596. access_meshlet_position_y = true;
  9597. }
  9598. type_id = type->parent_type;
  9599. type = &get<SPIRType>(type_id);
  9600. // If the physical type has an unnatural vecsize,
  9601. // we must assume it's a faked struct where the .data member
  9602. // is used for the real payload.
  9603. if (physical_type && (is_vector(*type) || is_scalar(*type)))
  9604. {
  9605. auto &phys = get<SPIRType>(physical_type);
  9606. if (phys.vecsize > 4)
  9607. expr += ".data";
  9608. }
  9609. access_chain_is_arrayed = true;
  9610. }
  9611. // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
  9612. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  9613. else if (type->basetype == SPIRType::Struct)
  9614. {
  9615. if (!is_literal)
  9616. index = evaluate_constant_u32(index);
  9617. if (index < uint32_t(type->member_type_index_redirection.size()))
  9618. index = type->member_type_index_redirection[index];
  9619. if (index >= type->member_types.size())
  9620. SPIRV_CROSS_THROW("Member index is out of bounds!");
  9621. if (hide_first_subscript)
  9622. {
  9623. // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers
  9624. hide_first_subscript = false;
  9625. }
  9626. else
  9627. {
  9628. BuiltIn builtin = BuiltInMax;
  9629. if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
  9630. {
  9631. if (access_chain_is_arrayed)
  9632. {
  9633. expr += ".";
  9634. expr += builtin_to_glsl(builtin, type->storage);
  9635. }
  9636. else
  9637. expr = builtin_to_glsl(builtin, type->storage);
  9638. if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT)
  9639. {
  9640. access_meshlet_position_y = true;
  9641. }
  9642. chain_is_builtin = true;
  9643. chained_builtin = builtin;
  9644. }
  9645. else
  9646. {
  9647. // If the member has a qualified name, use it as the entire chain
  9648. string qual_mbr_name = get_member_qualified_name(type_id, index);
  9649. if (!qual_mbr_name.empty())
  9650. expr = qual_mbr_name;
  9651. else if (flatten_member_reference)
  9652. expr += join("_", to_member_name(*type, index));
  9653. else
  9654. {
  9655. // Any pointer de-refences for values are handled in the first access chain.
  9656. // For pointer chains, the pointer-ness is resolved through an array access.
  9657. // The only time this is not true is when accessing array of SSBO/UBO.
  9658. // This case is explicitly handled.
  9659. expr += to_member_reference(base, *type, index, ptr_chain || i != 0);
  9660. }
  9661. }
  9662. }
  9663. if (has_member_decoration(type->self, index, DecorationInvariant))
  9664. is_invariant = true;
  9665. if (has_member_decoration(type->self, index, DecorationRelaxedPrecision))
  9666. relaxed_precision = true;
  9667. is_packed = member_is_packed_physical_type(*type, index);
  9668. if (member_is_remapped_physical_type(*type, index))
  9669. physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
  9670. else
  9671. physical_type = 0;
  9672. row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
  9673. type = &get<SPIRType>(type->member_types[index]);
  9674. }
  9675. // Matrix -> Vector
  9676. else if (type->columns > 1)
  9677. {
  9678. // If we have a row-major matrix here, we need to defer any transpose in case this access chain
  9679. // is used to store a column. We can resolve it right here and now if we access a scalar directly,
  9680. // by flipping indexing order of the matrix.
  9681. expr += "[";
  9682. if (is_literal)
  9683. expr += convert_to_string(index);
  9684. else
  9685. expr += to_unpacked_expression(index, register_expression_read);
  9686. expr += "]";
  9687. // If the physical type has an unnatural vecsize,
  9688. // we must assume it's a faked struct where the .data member
  9689. // is used for the real payload.
  9690. if (physical_type)
  9691. {
  9692. auto &phys = get<SPIRType>(physical_type);
  9693. if (phys.vecsize > 4 || phys.columns > 4)
  9694. expr += ".data";
  9695. }
  9696. type_id = type->parent_type;
  9697. type = &get<SPIRType>(type_id);
  9698. }
  9699. // Vector -> Scalar
  9700. else if (type->op == OpTypeCooperativeMatrixKHR || type->vecsize > 1)
  9701. {
  9702. string deferred_index;
  9703. if (row_major_matrix_needs_conversion)
  9704. {
  9705. // Flip indexing order.
  9706. auto column_index = expr.find_last_of('[');
  9707. if (column_index != string::npos)
  9708. {
  9709. deferred_index = expr.substr(column_index);
  9710. auto end_deferred_index = deferred_index.find_last_of(']');
  9711. if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size())
  9712. {
  9713. // If we have any data member fixups, it must be transposed so that it refers to this index.
  9714. // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
  9715. // and needs to be [1].data[0] instead.
  9716. end_deferred_index++;
  9717. deferred_index = deferred_index.substr(end_deferred_index) +
  9718. deferred_index.substr(0, end_deferred_index);
  9719. }
  9720. expr.resize(column_index);
  9721. }
  9722. }
  9723. // Internally, access chain implementation can also be used on composites,
  9724. // ignore scalar access workarounds in this case.
  9725. StorageClass effective_storage = StorageClassGeneric;
  9726. bool ignore_potential_sliced_writes = false;
  9727. if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
  9728. {
  9729. if (expression_type(base).pointer)
  9730. effective_storage = get_expression_effective_storage_class(base);
  9731. // Special consideration for control points.
  9732. // Control points can only be written by InvocationID, so there is no need
  9733. // to consider scalar access chains here.
  9734. // Cleans up some cases where it's very painful to determine the accurate storage class
  9735. // since blocks can be partially masked ...
  9736. auto *var = maybe_get_backing_variable(base);
  9737. if (var && var->storage == StorageClassOutput &&
  9738. get_execution_model() == ExecutionModelTessellationControl &&
  9739. !has_decoration(var->self, DecorationPatch))
  9740. {
  9741. ignore_potential_sliced_writes = true;
  9742. }
  9743. }
  9744. else
  9745. ignore_potential_sliced_writes = true;
  9746. if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9747. {
  9748. // On some backends, we might not be able to safely access individual scalars in a vector.
  9749. // To work around this, we might have to cast the access chain reference to something which can,
  9750. // like a pointer to scalar, which we can then index into.
  9751. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9752. is_packed);
  9753. }
  9754. if (is_literal)
  9755. {
  9756. bool out_of_bounds = index >= type->vecsize && type->op != OpTypeCooperativeMatrixKHR;
  9757. if (!is_packed && !row_major_matrix_needs_conversion && type->op != OpTypeCooperativeMatrixKHR)
  9758. {
  9759. expr += ".";
  9760. expr += index_to_swizzle(out_of_bounds ? 0 : index);
  9761. }
  9762. else
  9763. {
  9764. // For packed vectors, we can only access them as an array, not by swizzle.
  9765. expr += join("[", out_of_bounds ? 0 : index, "]");
  9766. }
  9767. }
  9768. else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
  9769. {
  9770. auto &c = get<SPIRConstant>(index);
  9771. bool out_of_bounds = (c.scalar() >= type->vecsize);
  9772. if (c.specialization)
  9773. {
  9774. // If the index is a spec constant, we cannot turn extract into a swizzle.
  9775. expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
  9776. }
  9777. else
  9778. {
  9779. expr += ".";
  9780. expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar());
  9781. }
  9782. }
  9783. else
  9784. {
  9785. expr += "[";
  9786. expr += to_unpacked_expression(index, register_expression_read);
  9787. expr += "]";
  9788. }
  9789. if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9790. {
  9791. if (prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9792. is_packed))
  9793. {
  9794. // We're in a pointer context now, so just remove any member dereference.
  9795. auto first_index = deferred_index.find_first_of('[');
  9796. if (first_index != string::npos && first_index != 0)
  9797. deferred_index = deferred_index.substr(first_index);
  9798. }
  9799. }
  9800. if (access_meshlet_position_y)
  9801. {
  9802. if (is_literal)
  9803. {
  9804. access_meshlet_position_y = index == 1;
  9805. }
  9806. else
  9807. {
  9808. const auto *c = maybe_get<SPIRConstant>(index);
  9809. if (c)
  9810. access_meshlet_position_y = c->scalar() == 1;
  9811. else
  9812. {
  9813. // We don't know, but we have to assume no.
  9814. // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave.
  9815. access_meshlet_position_y = false;
  9816. }
  9817. }
  9818. }
  9819. expr += deferred_index;
  9820. row_major_matrix_needs_conversion = false;
  9821. is_packed = false;
  9822. physical_type = 0;
  9823. type_id = type->parent_type;
  9824. type = &get<SPIRType>(type_id);
  9825. }
  9826. else if (!backend.allow_truncated_access_chain)
  9827. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  9828. }
  9829. if (pending_array_enclose)
  9830. {
  9831. SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
  9832. "but the access chain was terminated in the middle of a multidimensional array. "
  9833. "This is not supported.");
  9834. }
  9835. if (meta)
  9836. {
  9837. meta->need_transpose = row_major_matrix_needs_conversion;
  9838. meta->storage_is_packed = is_packed;
  9839. meta->storage_is_invariant = is_invariant;
  9840. meta->storage_physical_type = physical_type;
  9841. meta->relaxed_precision = relaxed_precision;
  9842. meta->access_meshlet_position_y = access_meshlet_position_y;
  9843. meta->chain_is_builtin = chain_is_builtin;
  9844. meta->builtin = chained_builtin;
  9845. }
  9846. return expr;
  9847. }
  9848. bool CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
  9849. {
  9850. return false;
  9851. }
  9852. bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, StorageClass, bool &)
  9853. {
  9854. return false;
  9855. }
  9856. string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
  9857. {
  9858. auto ret = join(basename, "_", to_member_name(type, index));
  9859. ParsedIR::sanitize_underscores(ret);
  9860. return ret;
  9861. }
  9862. uint32_t CompilerGLSL::get_physical_type_stride(const SPIRType &) const
  9863. {
  9864. SPIRV_CROSS_THROW("Invalid to call get_physical_type_stride on a backend without native pointer support.");
  9865. }
  9866. string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
  9867. AccessChainMeta *meta, bool ptr_chain)
  9868. {
  9869. if (flattened_buffer_blocks.count(base))
  9870. {
  9871. uint32_t matrix_stride = 0;
  9872. uint32_t array_stride = 0;
  9873. bool need_transpose = false;
  9874. flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
  9875. &array_stride, ptr_chain);
  9876. if (meta)
  9877. {
  9878. meta->need_transpose = target_type.columns > 1 && need_transpose;
  9879. meta->storage_is_packed = false;
  9880. }
  9881. return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
  9882. need_transpose);
  9883. }
  9884. else if (flattened_structs.count(base) && count > 0)
  9885. {
  9886. AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9887. if (ptr_chain)
  9888. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9889. if (flattened_structs[base])
  9890. {
  9891. flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
  9892. if (meta)
  9893. meta->flattened_struct = target_type.basetype == SPIRType::Struct;
  9894. }
  9895. auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
  9896. if (meta)
  9897. {
  9898. meta->need_transpose = false;
  9899. meta->storage_is_packed = false;
  9900. }
  9901. auto basename = to_flattened_access_chain_expression(base);
  9902. auto ret = join(basename, "_", chain);
  9903. ParsedIR::sanitize_underscores(ret);
  9904. return ret;
  9905. }
  9906. else
  9907. {
  9908. AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9909. if (ptr_chain)
  9910. {
  9911. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9912. // PtrAccessChain could get complicated.
  9913. TypeID type_id = expression_type_id(base);
  9914. if (backend.native_pointers && has_decoration(type_id, DecorationArrayStride))
  9915. {
  9916. // If there is a mismatch we have to go via 64-bit pointer arithmetic :'(
  9917. // Using packed hacks only gets us so far, and is not designed to deal with pointer to
  9918. // random values. It works for structs though.
  9919. auto &pointee_type = get_pointee_type(get<SPIRType>(type_id));
  9920. uint32_t physical_stride = get_physical_type_stride(pointee_type);
  9921. uint32_t requested_stride = get_decoration(type_id, DecorationArrayStride);
  9922. if (physical_stride != requested_stride)
  9923. {
  9924. flags |= ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT;
  9925. if (is_vector(pointee_type))
  9926. flags |= ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT;
  9927. }
  9928. }
  9929. }
  9930. return access_chain_internal(base, indices, count, flags, meta);
  9931. }
  9932. }
  9933. string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
  9934. {
  9935. auto expr = type_to_glsl_constructor(type);
  9936. expr += '(';
  9937. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  9938. {
  9939. if (i)
  9940. expr += ", ";
  9941. auto &member_type = get<SPIRType>(type.member_types[i]);
  9942. if (member_type.basetype == SPIRType::Struct)
  9943. expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
  9944. else
  9945. expr += to_flattened_struct_member(basename, type, i);
  9946. }
  9947. expr += ')';
  9948. return expr;
  9949. }
  9950. std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
  9951. {
  9952. // Do not use to_expression as that will unflatten access chains.
  9953. string basename;
  9954. if (const auto *var = maybe_get<SPIRVariable>(id))
  9955. basename = to_name(var->self);
  9956. else if (const auto *expr = maybe_get<SPIRExpression>(id))
  9957. basename = expr->expression;
  9958. else
  9959. basename = to_expression(id);
  9960. return basename;
  9961. }
  9962. void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
  9963. const SmallVector<uint32_t> &indices)
  9964. {
  9965. SmallVector<uint32_t> sub_indices = indices;
  9966. sub_indices.push_back(0);
  9967. auto *member_type = &type;
  9968. for (auto &index : indices)
  9969. member_type = &get<SPIRType>(member_type->member_types[index]);
  9970. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  9971. {
  9972. sub_indices.back() = i;
  9973. auto lhs = join(basename, "_", to_member_name(*member_type, i));
  9974. ParsedIR::sanitize_underscores(lhs);
  9975. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  9976. {
  9977. store_flattened_struct(lhs, rhs_id, type, sub_indices);
  9978. }
  9979. else
  9980. {
  9981. auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
  9982. statement(lhs, " = ", rhs, ";");
  9983. }
  9984. }
  9985. }
  9986. void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
  9987. {
  9988. auto &type = expression_type(lhs_id);
  9989. auto basename = to_flattened_access_chain_expression(lhs_id);
  9990. store_flattened_struct(basename, value, type, {});
  9991. }
  9992. std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
  9993. const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
  9994. uint32_t /* array_stride */, bool need_transpose)
  9995. {
  9996. if (!target_type.array.empty())
  9997. SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
  9998. else if (target_type.basetype == SPIRType::Struct)
  9999. return flattened_access_chain_struct(base, indices, count, target_type, offset);
  10000. else if (target_type.columns > 1)
  10001. return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  10002. else
  10003. return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  10004. }
  10005. std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
  10006. const SPIRType &target_type, uint32_t offset)
  10007. {
  10008. std::string expr;
  10009. if (backend.can_declare_struct_inline)
  10010. {
  10011. expr += type_to_glsl_constructor(target_type);
  10012. expr += "(";
  10013. }
  10014. else
  10015. expr += "{";
  10016. for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
  10017. {
  10018. if (i != 0)
  10019. expr += ", ";
  10020. const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
  10021. uint32_t member_offset = type_struct_member_offset(target_type, i);
  10022. // The access chain terminates at the struct, so we need to find matrix strides and row-major information
  10023. // ahead of time.
  10024. bool need_transpose = false;
  10025. bool relaxed = false;
  10026. uint32_t matrix_stride = 0;
  10027. if (member_type.columns > 1)
  10028. {
  10029. auto decorations = combined_decoration_for_member(target_type, i);
  10030. need_transpose = decorations.get(DecorationRowMajor);
  10031. relaxed = decorations.get(DecorationRelaxedPrecision);
  10032. matrix_stride = type_struct_member_matrix_stride(target_type, i);
  10033. }
  10034. auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
  10035. 0 /* array_stride */, need_transpose);
  10036. // Cannot forward transpositions, so resolve them here.
  10037. if (need_transpose)
  10038. expr += convert_row_major_matrix(tmp, member_type, 0, false, relaxed);
  10039. else
  10040. expr += tmp;
  10041. }
  10042. expr += backend.can_declare_struct_inline ? ")" : "}";
  10043. return expr;
  10044. }
  10045. std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
  10046. const SPIRType &target_type, uint32_t offset,
  10047. uint32_t matrix_stride, bool need_transpose)
  10048. {
  10049. assert(matrix_stride);
  10050. SPIRType tmp_type = target_type;
  10051. if (need_transpose)
  10052. swap(tmp_type.vecsize, tmp_type.columns);
  10053. std::string expr;
  10054. expr += type_to_glsl_constructor(tmp_type);
  10055. expr += "(";
  10056. for (uint32_t i = 0; i < tmp_type.columns; i++)
  10057. {
  10058. if (i != 0)
  10059. expr += ", ";
  10060. expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
  10061. /* need_transpose= */ false);
  10062. }
  10063. expr += ")";
  10064. return expr;
  10065. }
  10066. std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
  10067. const SPIRType &target_type, uint32_t offset,
  10068. uint32_t matrix_stride, bool need_transpose)
  10069. {
  10070. auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
  10071. auto buffer_name = to_name(expression_type(base).self);
  10072. if (need_transpose)
  10073. {
  10074. std::string expr;
  10075. if (target_type.vecsize > 1)
  10076. {
  10077. expr += type_to_glsl_constructor(target_type);
  10078. expr += "(";
  10079. }
  10080. for (uint32_t i = 0; i < target_type.vecsize; ++i)
  10081. {
  10082. if (i != 0)
  10083. expr += ", ";
  10084. uint32_t component_offset = result.second + i * matrix_stride;
  10085. assert(component_offset % (target_type.width / 8) == 0);
  10086. uint32_t index = component_offset / (target_type.width / 8);
  10087. expr += buffer_name;
  10088. expr += "[";
  10089. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  10090. expr += convert_to_string(index / 4);
  10091. expr += "]";
  10092. expr += vector_swizzle(1, index % 4);
  10093. }
  10094. if (target_type.vecsize > 1)
  10095. {
  10096. expr += ")";
  10097. }
  10098. return expr;
  10099. }
  10100. else
  10101. {
  10102. assert(result.second % (target_type.width / 8) == 0);
  10103. uint32_t index = result.second / (target_type.width / 8);
  10104. std::string expr;
  10105. expr += buffer_name;
  10106. expr += "[";
  10107. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  10108. expr += convert_to_string(index / 4);
  10109. expr += "]";
  10110. expr += vector_swizzle(target_type.vecsize, index % 4);
  10111. return expr;
  10112. }
  10113. }
  10114. std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
  10115. const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
  10116. bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
  10117. {
  10118. // Start traversing type hierarchy at the proper non-pointer types.
  10119. const auto *type = &get_pointee_type(basetype);
  10120. std::string expr;
  10121. // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
  10122. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
  10123. uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
  10124. uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
  10125. for (uint32_t i = 0; i < count; i++)
  10126. {
  10127. uint32_t index = indices[i];
  10128. // Pointers
  10129. if (ptr_chain && i == 0)
  10130. {
  10131. // Here, the pointer type will be decorated with an array stride.
  10132. array_stride = get_decoration(basetype.self, DecorationArrayStride);
  10133. if (!array_stride)
  10134. SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
  10135. auto *constant = maybe_get<SPIRConstant>(index);
  10136. if (constant)
  10137. {
  10138. // Constant array access.
  10139. offset += constant->scalar() * array_stride;
  10140. }
  10141. else
  10142. {
  10143. // Dynamic array access.
  10144. if (array_stride % word_stride)
  10145. {
  10146. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  10147. "of a 4-component vector. "
  10148. "Likely culprit here is a float or vec2 array inside a push "
  10149. "constant block which is std430. "
  10150. "This cannot be flattened. Try using std140 layout instead.");
  10151. }
  10152. expr += to_enclosed_expression(index);
  10153. expr += " * ";
  10154. expr += convert_to_string(array_stride / word_stride);
  10155. expr += " + ";
  10156. }
  10157. }
  10158. // Arrays
  10159. else if (!type->array.empty())
  10160. {
  10161. auto *constant = maybe_get<SPIRConstant>(index);
  10162. if (constant)
  10163. {
  10164. // Constant array access.
  10165. offset += constant->scalar() * array_stride;
  10166. }
  10167. else
  10168. {
  10169. // Dynamic array access.
  10170. if (array_stride % word_stride)
  10171. {
  10172. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  10173. "of a 4-component vector. "
  10174. "Likely culprit here is a float or vec2 array inside a push "
  10175. "constant block which is std430. "
  10176. "This cannot be flattened. Try using std140 layout instead.");
  10177. }
  10178. expr += to_enclosed_expression(index, false);
  10179. expr += " * ";
  10180. expr += convert_to_string(array_stride / word_stride);
  10181. expr += " + ";
  10182. }
  10183. uint32_t parent_type = type->parent_type;
  10184. type = &get<SPIRType>(parent_type);
  10185. if (!type->array.empty())
  10186. array_stride = get_decoration(parent_type, DecorationArrayStride);
  10187. }
  10188. // For structs, the index refers to a constant, which indexes into the members.
  10189. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  10190. else if (type->basetype == SPIRType::Struct)
  10191. {
  10192. index = evaluate_constant_u32(index);
  10193. if (index >= type->member_types.size())
  10194. SPIRV_CROSS_THROW("Member index is out of bounds!");
  10195. offset += type_struct_member_offset(*type, index);
  10196. auto &struct_type = *type;
  10197. type = &get<SPIRType>(type->member_types[index]);
  10198. if (type->columns > 1)
  10199. {
  10200. matrix_stride = type_struct_member_matrix_stride(struct_type, index);
  10201. row_major_matrix_needs_conversion =
  10202. combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
  10203. }
  10204. else
  10205. row_major_matrix_needs_conversion = false;
  10206. if (!type->array.empty())
  10207. array_stride = type_struct_member_array_stride(struct_type, index);
  10208. }
  10209. // Matrix -> Vector
  10210. else if (type->columns > 1)
  10211. {
  10212. auto *constant = maybe_get<SPIRConstant>(index);
  10213. if (constant)
  10214. {
  10215. index = evaluate_constant_u32(index);
  10216. offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
  10217. }
  10218. else
  10219. {
  10220. uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
  10221. // Dynamic array access.
  10222. if (indexing_stride % word_stride)
  10223. {
  10224. SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
  10225. "4-component vector. "
  10226. "Likely culprit here is a row-major matrix being accessed dynamically. "
  10227. "This cannot be flattened. Try using std140 layout instead.");
  10228. }
  10229. expr += to_enclosed_expression(index, false);
  10230. expr += " * ";
  10231. expr += convert_to_string(indexing_stride / word_stride);
  10232. expr += " + ";
  10233. }
  10234. type = &get<SPIRType>(type->parent_type);
  10235. }
  10236. // Vector -> Scalar
  10237. else if (type->vecsize > 1)
  10238. {
  10239. auto *constant = maybe_get<SPIRConstant>(index);
  10240. if (constant)
  10241. {
  10242. index = evaluate_constant_u32(index);
  10243. offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
  10244. }
  10245. else
  10246. {
  10247. uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
  10248. // Dynamic array access.
  10249. if (indexing_stride % word_stride)
  10250. {
  10251. SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
  10252. "size of a 4-component vector. "
  10253. "This cannot be flattened in legacy targets.");
  10254. }
  10255. expr += to_enclosed_expression(index, false);
  10256. expr += " * ";
  10257. expr += convert_to_string(indexing_stride / word_stride);
  10258. expr += " + ";
  10259. }
  10260. type = &get<SPIRType>(type->parent_type);
  10261. }
  10262. else
  10263. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  10264. }
  10265. if (need_transpose)
  10266. *need_transpose = row_major_matrix_needs_conversion;
  10267. if (out_matrix_stride)
  10268. *out_matrix_stride = matrix_stride;
  10269. if (out_array_stride)
  10270. *out_array_stride = array_stride;
  10271. return std::make_pair(expr, offset);
  10272. }
  10273. bool CompilerGLSL::should_dereference(uint32_t id)
  10274. {
  10275. const auto &type = expression_type(id);
  10276. // Non-pointer expressions don't need to be dereferenced.
  10277. if (!is_pointer(type))
  10278. return false;
  10279. // Handles shouldn't be dereferenced either.
  10280. if (!expression_is_lvalue(id))
  10281. return false;
  10282. // If id is a variable but not a phi variable, we should not dereference it.
  10283. // BDA passed around as parameters are always pointers.
  10284. if (auto *var = maybe_get<SPIRVariable>(id))
  10285. return (var->parameter && is_physical_or_buffer_pointer(type)) || var->phi_variable;
  10286. if (auto *expr = maybe_get<SPIRExpression>(id))
  10287. {
  10288. // If id is an access chain, we should not dereference it.
  10289. if (expr->access_chain)
  10290. return false;
  10291. // If id is a forwarded copy of a variable pointer, we should not dereference it.
  10292. SPIRVariable *var = nullptr;
  10293. while (expr->loaded_from && expression_is_forwarded(expr->self))
  10294. {
  10295. auto &src_type = expression_type(expr->loaded_from);
  10296. // To be a copy, the pointer and its source expression must be the
  10297. // same type. Can't check type.self, because for some reason that's
  10298. // usually the base type with pointers stripped off. This check is
  10299. // complex enough that I've hoisted it out of the while condition.
  10300. if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
  10301. src_type.parent_type != type.parent_type)
  10302. break;
  10303. if ((var = maybe_get<SPIRVariable>(expr->loaded_from)))
  10304. break;
  10305. if (!(expr = maybe_get<SPIRExpression>(expr->loaded_from)))
  10306. break;
  10307. }
  10308. return !var || var->phi_variable;
  10309. }
  10310. // Otherwise, we should dereference this pointer expression.
  10311. return true;
  10312. }
  10313. bool CompilerGLSL::should_dereference_caller_param(uint32_t id)
  10314. {
  10315. const auto &type = expression_type(id);
  10316. // BDA is always passed around as pointers. Similarly, we need to pass variable buffer pointers as pointers.
  10317. if (is_physical_or_buffer_pointer(type))
  10318. return false;
  10319. return should_dereference(id);
  10320. }
  10321. bool CompilerGLSL::should_forward(uint32_t id) const
  10322. {
  10323. // If id is a variable we will try to forward it regardless of force_temporary check below
  10324. // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
  10325. auto *var = maybe_get<SPIRVariable>(id);
  10326. if (var)
  10327. {
  10328. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  10329. return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile));
  10330. }
  10331. // For debugging emit temporary variables for all expressions
  10332. if (options.force_temporary)
  10333. return false;
  10334. // If an expression carries enough dependencies we need to stop forwarding at some point,
  10335. // or we explode compilers. There are usually limits to how much we can nest expressions.
  10336. auto *expr = maybe_get<SPIRExpression>(id);
  10337. const uint32_t max_expression_dependencies = 64;
  10338. if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
  10339. return false;
  10340. if (expr && expr->loaded_from
  10341. && has_decoration(expr->loaded_from, DecorationBuiltIn)
  10342. && has_decoration(expr->loaded_from, DecorationVolatile))
  10343. {
  10344. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  10345. return false;
  10346. }
  10347. // Immutable expression can always be forwarded.
  10348. if (is_immutable(id))
  10349. return true;
  10350. return false;
  10351. }
  10352. bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
  10353. {
  10354. // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
  10355. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
  10356. }
  10357. void CompilerGLSL::track_expression_read(uint32_t id)
  10358. {
  10359. switch (ir.ids[id].get_type())
  10360. {
  10361. case TypeExpression:
  10362. {
  10363. auto &e = get<SPIRExpression>(id);
  10364. for (auto implied_read : e.implied_read_expressions)
  10365. track_expression_read(implied_read);
  10366. break;
  10367. }
  10368. case TypeAccessChain:
  10369. {
  10370. auto &e = get<SPIRAccessChain>(id);
  10371. for (auto implied_read : e.implied_read_expressions)
  10372. track_expression_read(implied_read);
  10373. break;
  10374. }
  10375. default:
  10376. break;
  10377. }
  10378. // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
  10379. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
  10380. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
  10381. {
  10382. auto &v = expression_usage_counts[id];
  10383. v++;
  10384. // If we create an expression outside a loop,
  10385. // but access it inside a loop, we're implicitly reading it multiple times.
  10386. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
  10387. // working inside the backend compiler.
  10388. if (expression_read_implies_multiple_reads(id))
  10389. v++;
  10390. if (v >= 2)
  10391. {
  10392. //if (v == 2)
  10393. // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
  10394. // Force a recompile after this pass to avoid forwarding this variable.
  10395. force_temporary_and_recompile(id);
  10396. }
  10397. }
  10398. }
  10399. bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
  10400. {
  10401. if (forced_temporaries.find(id) != end(forced_temporaries))
  10402. return false;
  10403. for (uint32_t i = 0; i < num_args; i++)
  10404. if (!should_forward(args[i]))
  10405. return false;
  10406. // We need to forward globals as well.
  10407. if (!pure)
  10408. {
  10409. for (auto global : global_variables)
  10410. if (!should_forward(global))
  10411. return false;
  10412. for (auto aliased : aliased_variables)
  10413. if (!should_forward(aliased))
  10414. return false;
  10415. }
  10416. return true;
  10417. }
  10418. void CompilerGLSL::register_impure_function_call()
  10419. {
  10420. // Impure functions can modify globals and aliased variables, so invalidate them as well.
  10421. for (auto global : global_variables)
  10422. flush_dependees(get<SPIRVariable>(global));
  10423. for (auto aliased : aliased_variables)
  10424. flush_dependees(get<SPIRVariable>(aliased));
  10425. }
  10426. void CompilerGLSL::register_call_out_argument(uint32_t id)
  10427. {
  10428. register_write(id);
  10429. auto *var = maybe_get<SPIRVariable>(id);
  10430. if (var)
  10431. flush_variable_declaration(var->self);
  10432. }
  10433. string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
  10434. {
  10435. // These variables are always function local,
  10436. // so make sure we emit the variable without storage qualifiers.
  10437. // Some backends will inject custom variables locally in a function
  10438. // with a storage qualifier which is not function-local.
  10439. auto old_storage = var.storage;
  10440. var.storage = StorageClassFunction;
  10441. auto expr = variable_decl(var);
  10442. var.storage = old_storage;
  10443. return expr;
  10444. }
  10445. void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
  10446. {
  10447. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  10448. if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
  10449. {
  10450. auto &type = get<SPIRType>(var.basetype);
  10451. auto &flags = get_decoration_bitset(var.self);
  10452. statement(flags_to_qualifiers_glsl(type, var.self, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
  10453. flushed_phi_variables.insert(var.self);
  10454. }
  10455. }
  10456. void CompilerGLSL::flush_variable_declaration(uint32_t id)
  10457. {
  10458. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  10459. auto *var = maybe_get<SPIRVariable>(id);
  10460. if (var && var->deferred_declaration)
  10461. {
  10462. string initializer;
  10463. if (options.force_zero_initialized_variables &&
  10464. (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
  10465. var->storage == StorageClassPrivate) &&
  10466. !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
  10467. {
  10468. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
  10469. }
  10470. statement(variable_decl_function_local(*var), initializer, ";");
  10471. var->deferred_declaration = false;
  10472. }
  10473. if (var)
  10474. {
  10475. emit_variable_temporary_copies(*var);
  10476. }
  10477. }
  10478. bool CompilerGLSL::remove_duplicate_swizzle(string &op)
  10479. {
  10480. auto pos = op.find_last_of('.');
  10481. if (pos == string::npos || pos == 0)
  10482. return false;
  10483. string final_swiz = op.substr(pos + 1, string::npos);
  10484. if (backend.swizzle_is_function)
  10485. {
  10486. if (final_swiz.size() < 2)
  10487. return false;
  10488. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  10489. final_swiz.erase(final_swiz.size() - 2, string::npos);
  10490. else
  10491. return false;
  10492. }
  10493. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  10494. // If so, and previous swizzle is of same length,
  10495. // we can drop the final swizzle altogether.
  10496. for (uint32_t i = 0; i < final_swiz.size(); i++)
  10497. {
  10498. static const char expected[] = { 'x', 'y', 'z', 'w' };
  10499. if (i >= 4 || final_swiz[i] != expected[i])
  10500. return false;
  10501. }
  10502. auto prevpos = op.find_last_of('.', pos - 1);
  10503. if (prevpos == string::npos)
  10504. return false;
  10505. prevpos++;
  10506. // Make sure there are only swizzles here ...
  10507. for (auto i = prevpos; i < pos; i++)
  10508. {
  10509. if (op[i] < 'w' || op[i] > 'z')
  10510. {
  10511. // If swizzles are foo.xyz() like in C++ backend for example, check for that.
  10512. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
  10513. break;
  10514. return false;
  10515. }
  10516. }
  10517. // If original swizzle is large enough, just carve out the components we need.
  10518. // E.g. foobar.wyx.xy will turn into foobar.wy.
  10519. if (pos - prevpos >= final_swiz.size())
  10520. {
  10521. op.erase(prevpos + final_swiz.size(), string::npos);
  10522. // Add back the function call ...
  10523. if (backend.swizzle_is_function)
  10524. op += "()";
  10525. }
  10526. return true;
  10527. }
  10528. // Optimizes away vector swizzles where we have something like
  10529. // vec3 foo;
  10530. // foo.xyz <-- swizzle expression does nothing.
  10531. // This is a very common pattern after OpCompositeCombine.
  10532. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
  10533. {
  10534. auto pos = op.find_last_of('.');
  10535. if (pos == string::npos || pos == 0)
  10536. return false;
  10537. string final_swiz = op.substr(pos + 1, string::npos);
  10538. if (backend.swizzle_is_function)
  10539. {
  10540. if (final_swiz.size() < 2)
  10541. return false;
  10542. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  10543. final_swiz.erase(final_swiz.size() - 2, string::npos);
  10544. else
  10545. return false;
  10546. }
  10547. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  10548. // If so, and previous swizzle is of same length,
  10549. // we can drop the final swizzle altogether.
  10550. for (uint32_t i = 0; i < final_swiz.size(); i++)
  10551. {
  10552. static const char expected[] = { 'x', 'y', 'z', 'w' };
  10553. if (i >= 4 || final_swiz[i] != expected[i])
  10554. return false;
  10555. }
  10556. auto &type = expression_type(base);
  10557. // Sanity checking ...
  10558. assert(type.columns == 1 && type.array.empty());
  10559. if (type.vecsize == final_swiz.size())
  10560. op.erase(pos, string::npos);
  10561. return true;
  10562. }
  10563. string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
  10564. {
  10565. ID base = 0;
  10566. string op;
  10567. string subop;
  10568. // Can only merge swizzles for vectors.
  10569. auto &type = get<SPIRType>(return_type);
  10570. bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1 &&
  10571. type.op != OpTypeCooperativeMatrixKHR;
  10572. bool swizzle_optimization = false;
  10573. for (uint32_t i = 0; i < length; i++)
  10574. {
  10575. auto *e = maybe_get<SPIRExpression>(elems[i]);
  10576. // If we're merging another scalar which belongs to the same base
  10577. // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
  10578. if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
  10579. {
  10580. // Only supposed to be used for vector swizzle -> scalar.
  10581. assert(!e->expression.empty() && e->expression.front() == '.');
  10582. subop += e->expression.substr(1, string::npos);
  10583. swizzle_optimization = true;
  10584. }
  10585. else
  10586. {
  10587. // We'll likely end up with duplicated swizzles, e.g.
  10588. // foobar.xyz.xyz from patterns like
  10589. // OpVectorShuffle
  10590. // OpCompositeExtract x 3
  10591. // OpCompositeConstruct 3x + other scalar.
  10592. // Just modify op in-place.
  10593. if (swizzle_optimization)
  10594. {
  10595. if (backend.swizzle_is_function)
  10596. subop += "()";
  10597. // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
  10598. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
  10599. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
  10600. // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
  10601. // Case 1:
  10602. // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
  10603. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
  10604. // Case 2:
  10605. // foo.xyz: Duplicate swizzle won't kick in.
  10606. // If foo is vec3, we can remove xyz, giving just foo.
  10607. if (!remove_duplicate_swizzle(subop))
  10608. remove_unity_swizzle(base, subop);
  10609. // Strips away redundant parens if we created them during component extraction.
  10610. strip_enclosed_expression(subop);
  10611. swizzle_optimization = false;
  10612. op += subop;
  10613. }
  10614. else
  10615. op += subop;
  10616. if (i)
  10617. op += ", ";
  10618. bool uses_buffer_offset =
  10619. type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
  10620. subop = to_composite_constructor_expression(type, elems[i], uses_buffer_offset);
  10621. }
  10622. base = e ? e->base_expression : ID(0);
  10623. }
  10624. if (swizzle_optimization)
  10625. {
  10626. if (backend.swizzle_is_function)
  10627. subop += "()";
  10628. if (!remove_duplicate_swizzle(subop))
  10629. remove_unity_swizzle(base, subop);
  10630. // Strips away redundant parens if we created them during component extraction.
  10631. strip_enclosed_expression(subop);
  10632. }
  10633. op += subop;
  10634. return op;
  10635. }
  10636. bool CompilerGLSL::skip_argument(uint32_t id) const
  10637. {
  10638. if (!combined_image_samplers.empty() || !options.vulkan_semantics)
  10639. {
  10640. auto &type = expression_type(id);
  10641. if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
  10642. return true;
  10643. }
  10644. return false;
  10645. }
  10646. bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
  10647. {
  10648. // Do this with strings because we have a very clear pattern we can check for and it avoids
  10649. // adding lots of special cases to the code emission.
  10650. if (rhs.size() < lhs.size() + 3)
  10651. return false;
  10652. // Do not optimize matrices. They are a bit awkward to reason about in general
  10653. // (in which order does operation happen?), and it does not work on MSL anyways.
  10654. if (type.vecsize > 1 && type.columns > 1)
  10655. return false;
  10656. auto index = rhs.find(lhs);
  10657. if (index != 0)
  10658. return false;
  10659. // TODO: Shift operators, but it's not important for now.
  10660. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
  10661. if (op != lhs.size() + 1)
  10662. return false;
  10663. // Check that the op is followed by space. This excludes && and ||.
  10664. if (rhs[op + 1] != ' ')
  10665. return false;
  10666. char bop = rhs[op];
  10667. auto expr = rhs.substr(lhs.size() + 3);
  10668. // Avoids false positives where we get a = a * b + c.
  10669. // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this.
  10670. if (needs_enclose_expression(expr))
  10671. return false;
  10672. // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
  10673. // Find some common patterns which are equivalent.
  10674. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
  10675. statement(lhs, bop, bop, ";");
  10676. else
  10677. statement(lhs, " ", bop, "= ", expr, ";");
  10678. return true;
  10679. }
  10680. void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
  10681. {
  10682. if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
  10683. return;
  10684. assert(current_emitting_block);
  10685. current_emitting_block->invalidate_expressions.push_back(expr);
  10686. }
  10687. void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
  10688. {
  10689. current_emitting_block = &block;
  10690. if (backend.requires_relaxed_precision_analysis)
  10691. {
  10692. // If PHI variables are consumed in unexpected precision contexts, copy them here.
  10693. for (size_t i = 0, n = block.phi_variables.size(); i < n; i++)
  10694. {
  10695. auto &phi = block.phi_variables[i];
  10696. // Ensure we only copy once. We know a-priori that this array will lay out
  10697. // the same function variables together.
  10698. if (i && block.phi_variables[i - 1].function_variable == phi.function_variable)
  10699. continue;
  10700. auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable);
  10701. if (itr != temporary_to_mirror_precision_alias.end())
  10702. {
  10703. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10704. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10705. EmbeddedInstruction inst;
  10706. inst.op = OpCopyObject;
  10707. inst.length = 3;
  10708. inst.ops.push_back(expression_type_id(itr->first));
  10709. inst.ops.push_back(itr->second);
  10710. inst.ops.push_back(itr->first);
  10711. emit_instruction(inst);
  10712. }
  10713. }
  10714. }
  10715. for (auto &op : block.ops)
  10716. {
  10717. auto temporary_copy = handle_instruction_precision(op);
  10718. emit_instruction(op);
  10719. if (temporary_copy.dst_id)
  10720. {
  10721. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10722. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10723. EmbeddedInstruction inst;
  10724. inst.op = OpCopyObject;
  10725. inst.length = 3;
  10726. inst.ops.push_back(expression_type_id(temporary_copy.src_id));
  10727. inst.ops.push_back(temporary_copy.dst_id);
  10728. inst.ops.push_back(temporary_copy.src_id);
  10729. // Never attempt to hoist mirrored temporaries.
  10730. // They are hoisted in lock-step with their parents.
  10731. block_temporary_hoisting = true;
  10732. emit_instruction(inst);
  10733. block_temporary_hoisting = false;
  10734. }
  10735. }
  10736. current_emitting_block = nullptr;
  10737. }
  10738. void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
  10739. {
  10740. // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
  10741. // these will be marked as having suppressed usage tracking.
  10742. // Our only concern is to make sure arithmetic operations are done in similar ways.
  10743. if (forced_invariant_temporaries.count(expr.self) == 0)
  10744. {
  10745. if (!expression_suppresses_usage_tracking(expr.self))
  10746. force_temporary_and_recompile(expr.self);
  10747. forced_invariant_temporaries.insert(expr.self);
  10748. for (auto &dependent : expr.invariance_dependencies)
  10749. disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
  10750. }
  10751. }
  10752. void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
  10753. {
  10754. // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
  10755. // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
  10756. // in one translation unit, but not another, e.g. due to multiple use of an expression.
  10757. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
  10758. // expressions to be temporaries.
  10759. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
  10760. // for all reasonable uses of invariant.
  10761. if (!has_decoration(store_id, DecorationInvariant))
  10762. return;
  10763. auto *expr = maybe_get<SPIRExpression>(value_id);
  10764. if (!expr)
  10765. return;
  10766. disallow_forwarding_in_expression_chain(*expr);
  10767. }
  10768. void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  10769. {
  10770. auto rhs = to_pointer_expression(rhs_expression);
  10771. // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
  10772. if (!rhs.empty())
  10773. {
  10774. handle_store_to_invariant_variable(lhs_expression, rhs_expression);
  10775. if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
  10776. {
  10777. auto lhs = to_dereferenced_expression(lhs_expression);
  10778. if (has_decoration(lhs_expression, DecorationNonUniform))
  10779. convert_non_uniform_expression(lhs, lhs_expression);
  10780. // We might need to cast in order to store to a builtin.
  10781. cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
  10782. // Tries to optimize assignments like "<lhs> = <lhs> op expr".
  10783. // While this is purely cosmetic, this is important for legacy ESSL where loop
  10784. // variable increments must be in either i++ or i += const-expr.
  10785. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
  10786. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  10787. statement(lhs, " = ", rhs, ";");
  10788. }
  10789. register_write(lhs_expression);
  10790. }
  10791. }
  10792. uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
  10793. {
  10794. if (instr.length < 3)
  10795. return 32;
  10796. auto *ops = stream(instr);
  10797. switch (instr.op)
  10798. {
  10799. case OpSConvert:
  10800. case OpConvertSToF:
  10801. case OpUConvert:
  10802. case OpConvertUToF:
  10803. case OpIEqual:
  10804. case OpINotEqual:
  10805. case OpSLessThan:
  10806. case OpSLessThanEqual:
  10807. case OpSGreaterThan:
  10808. case OpSGreaterThanEqual:
  10809. case OpULessThan:
  10810. case OpULessThanEqual:
  10811. case OpUGreaterThan:
  10812. case OpUGreaterThanEqual:
  10813. return expression_type(ops[2]).width;
  10814. case OpSMulExtended:
  10815. case OpUMulExtended:
  10816. return get<SPIRType>(get<SPIRType>(ops[0]).member_types[0]).width;
  10817. default:
  10818. {
  10819. // We can look at result type which is more robust.
  10820. auto *type = maybe_get<SPIRType>(ops[0]);
  10821. if (type && type_is_integral(*type))
  10822. return type->width;
  10823. else
  10824. return 32;
  10825. }
  10826. }
  10827. }
  10828. uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
  10829. {
  10830. if (length < 1)
  10831. return 32;
  10832. switch (op)
  10833. {
  10834. case GLSLstd450SAbs:
  10835. case GLSLstd450SSign:
  10836. case GLSLstd450UMin:
  10837. case GLSLstd450SMin:
  10838. case GLSLstd450UMax:
  10839. case GLSLstd450SMax:
  10840. case GLSLstd450UClamp:
  10841. case GLSLstd450SClamp:
  10842. case GLSLstd450FindSMsb:
  10843. case GLSLstd450FindUMsb:
  10844. return expression_type(ops[0]).width;
  10845. default:
  10846. {
  10847. // We don't need to care about other opcodes, just return 32.
  10848. return 32;
  10849. }
  10850. }
  10851. }
  10852. void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
  10853. {
  10854. // Only GLSL supports RelaxedPrecision directly.
  10855. // We cannot implement this in HLSL or MSL because it is tied to the type system.
  10856. // In SPIR-V, everything must masquerade as 32-bit.
  10857. if (!backend.requires_relaxed_precision_analysis)
  10858. return;
  10859. auto input_precision = analyze_expression_precision(args, length);
  10860. // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
  10861. // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
  10862. if (input_precision == Options::Mediump)
  10863. set_decoration(dst_id, DecorationRelaxedPrecision);
  10864. }
  10865. CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
  10866. {
  10867. // Now, analyze the precision at which the arguments would run.
  10868. // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
  10869. // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
  10870. // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
  10871. // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
  10872. // correct precision.
  10873. bool expression_has_highp = false;
  10874. bool expression_has_mediump = false;
  10875. for (uint32_t i = 0; i < length; i++)
  10876. {
  10877. uint32_t arg = args[i];
  10878. auto handle_type = ir.ids[arg].get_type();
  10879. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  10880. continue;
  10881. if (has_decoration(arg, DecorationRelaxedPrecision))
  10882. expression_has_mediump = true;
  10883. else
  10884. expression_has_highp = true;
  10885. }
  10886. if (expression_has_highp)
  10887. return Options::Highp;
  10888. else if (expression_has_mediump)
  10889. return Options::Mediump;
  10890. else
  10891. return Options::DontCare;
  10892. }
  10893. void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
  10894. {
  10895. if (!backend.requires_relaxed_precision_analysis)
  10896. return;
  10897. auto &type = get<SPIRType>(type_id);
  10898. // RelaxedPrecision only applies to 32-bit values.
  10899. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
  10900. return;
  10901. bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision);
  10902. auto input_precision = analyze_expression_precision(args, length);
  10903. if (input_precision == Options::DontCare)
  10904. {
  10905. consume_temporary_in_precision_context(type_id, dst_id, input_precision);
  10906. return;
  10907. }
  10908. // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
  10909. // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
  10910. // However, if the expression is not, inputs must be expanded to 32-bit first,
  10911. // since the operation must run at high precision.
  10912. // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
  10913. // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
  10914. // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
  10915. if ((operation_is_highp && input_precision == Options::Mediump) ||
  10916. (!operation_is_highp && input_precision == Options::Highp))
  10917. {
  10918. auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
  10919. for (uint32_t i = 0; i < length; i++)
  10920. {
  10921. // Rewrites the opcode so that we consume an ID in correct precision context.
  10922. // This is pretty hacky, but it's the most straight forward way of implementing this without adding
  10923. // lots of extra passes to rewrite all code blocks.
  10924. args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision);
  10925. }
  10926. }
  10927. }
  10928. // This is probably not exhaustive ...
  10929. static bool opcode_is_precision_sensitive_operation(Op op)
  10930. {
  10931. switch (op)
  10932. {
  10933. case OpFAdd:
  10934. case OpFSub:
  10935. case OpFMul:
  10936. case OpFNegate:
  10937. case OpIAdd:
  10938. case OpISub:
  10939. case OpIMul:
  10940. case OpSNegate:
  10941. case OpFMod:
  10942. case OpFDiv:
  10943. case OpFRem:
  10944. case OpSMod:
  10945. case OpSDiv:
  10946. case OpSRem:
  10947. case OpUMod:
  10948. case OpUDiv:
  10949. case OpVectorTimesMatrix:
  10950. case OpMatrixTimesVector:
  10951. case OpMatrixTimesMatrix:
  10952. case OpDPdx:
  10953. case OpDPdy:
  10954. case OpDPdxCoarse:
  10955. case OpDPdyCoarse:
  10956. case OpDPdxFine:
  10957. case OpDPdyFine:
  10958. case OpFwidth:
  10959. case OpFwidthCoarse:
  10960. case OpFwidthFine:
  10961. case OpVectorTimesScalar:
  10962. case OpMatrixTimesScalar:
  10963. case OpOuterProduct:
  10964. case OpFConvert:
  10965. case OpSConvert:
  10966. case OpUConvert:
  10967. case OpConvertSToF:
  10968. case OpConvertUToF:
  10969. case OpConvertFToU:
  10970. case OpConvertFToS:
  10971. return true;
  10972. default:
  10973. return false;
  10974. }
  10975. }
  10976. // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
  10977. // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
  10978. // relevant when operating on the IDs, not when shuffling things around.
  10979. static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
  10980. {
  10981. switch (op)
  10982. {
  10983. case OpLoad:
  10984. case OpAccessChain:
  10985. case OpInBoundsAccessChain:
  10986. case OpCompositeExtract:
  10987. case OpVectorExtractDynamic:
  10988. case OpSampledImage:
  10989. case OpImage:
  10990. case OpCopyObject:
  10991. case OpImageRead:
  10992. case OpImageFetch:
  10993. case OpImageSampleImplicitLod:
  10994. case OpImageSampleProjImplicitLod:
  10995. case OpImageSampleDrefImplicitLod:
  10996. case OpImageSampleProjDrefImplicitLod:
  10997. case OpImageSampleExplicitLod:
  10998. case OpImageSampleProjExplicitLod:
  10999. case OpImageSampleDrefExplicitLod:
  11000. case OpImageSampleProjDrefExplicitLod:
  11001. case OpImageGather:
  11002. case OpImageDrefGather:
  11003. case OpImageSparseRead:
  11004. case OpImageSparseFetch:
  11005. case OpImageSparseSampleImplicitLod:
  11006. case OpImageSparseSampleProjImplicitLod:
  11007. case OpImageSparseSampleDrefImplicitLod:
  11008. case OpImageSparseSampleProjDrefImplicitLod:
  11009. case OpImageSparseSampleExplicitLod:
  11010. case OpImageSparseSampleProjExplicitLod:
  11011. case OpImageSparseSampleDrefExplicitLod:
  11012. case OpImageSparseSampleProjDrefExplicitLod:
  11013. case OpImageSparseGather:
  11014. case OpImageSparseDrefGather:
  11015. arg_count = 1;
  11016. return true;
  11017. case OpVectorShuffle:
  11018. arg_count = 2;
  11019. return true;
  11020. case OpCompositeConstruct:
  11021. return true;
  11022. default:
  11023. break;
  11024. }
  11025. return false;
  11026. }
  11027. CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
  11028. {
  11029. auto ops = stream_mutable(instruction);
  11030. auto opcode = static_cast<Op>(instruction.op);
  11031. uint32_t length = instruction.length;
  11032. if (backend.requires_relaxed_precision_analysis)
  11033. {
  11034. if (length > 2)
  11035. {
  11036. uint32_t forwarding_length = length - 2;
  11037. if (opcode_is_precision_sensitive_operation(opcode))
  11038. analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length);
  11039. else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(ops[2]).ext == SPIRExtension::GLSL)
  11040. analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2);
  11041. else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length))
  11042. forward_relaxed_precision(ops[1], &ops[2], forwarding_length);
  11043. }
  11044. uint32_t result_type = 0, result_id = 0;
  11045. if (instruction_to_result_type(result_type, result_id, opcode, ops, length))
  11046. {
  11047. auto itr = temporary_to_mirror_precision_alias.find(ops[1]);
  11048. if (itr != temporary_to_mirror_precision_alias.end())
  11049. return { itr->second, itr->first };
  11050. }
  11051. }
  11052. return {};
  11053. }
  11054. static pair<string, string> split_coopmat_pointer(const string &expr)
  11055. {
  11056. auto ptr_expr = expr;
  11057. string index_expr;
  11058. if (ptr_expr.back() != ']')
  11059. SPIRV_CROSS_THROW("Access chain for coopmat must be indexed into an array.");
  11060. // Strip the access chain.
  11061. ptr_expr.pop_back();
  11062. uint32_t counter = 1;
  11063. while (counter && !ptr_expr.empty())
  11064. {
  11065. if (ptr_expr.back() == ']')
  11066. counter++;
  11067. else if (ptr_expr.back() == '[')
  11068. counter--;
  11069. ptr_expr.pop_back();
  11070. }
  11071. if (ptr_expr.empty())
  11072. SPIRV_CROSS_THROW("Invalid pointer expression for coopmat.");
  11073. index_expr = expr.substr(ptr_expr.size() + 1, expr.size() - (ptr_expr.size() + 1) - 1);
  11074. return { std::move(ptr_expr), std::move(index_expr) };
  11075. }
  11076. void CompilerGLSL::emit_instruction(const Instruction &instruction)
  11077. {
  11078. auto ops = stream(instruction);
  11079. auto opcode = static_cast<Op>(instruction.op);
  11080. uint32_t length = instruction.length;
  11081. #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  11082. #define GLSL_BOP_CAST(op, type) \
  11083. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
  11084. opcode_is_sign_invariant(opcode), implicit_integer_promotion)
  11085. #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  11086. #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
  11087. #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  11088. #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  11089. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  11090. #define GLSL_BFOP_CAST(op, type) \
  11091. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  11092. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  11093. #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  11094. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  11095. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  11096. auto int_type = to_signed_basetype(integer_width);
  11097. auto uint_type = to_unsigned_basetype(integer_width);
  11098. // Handle C implicit integer promotion rules.
  11099. // If we get implicit promotion to int, need to make sure we cast by value to intended return type,
  11100. // otherwise, future sign-dependent operations and bitcasts will break.
  11101. bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
  11102. opcode_can_promote_integer_implicitly(opcode) &&
  11103. get<SPIRType>(ops[0]).vecsize == 1;
  11104. opcode = get_remapped_spirv_op(opcode);
  11105. switch (opcode)
  11106. {
  11107. // Dealing with memory
  11108. case OpLoad:
  11109. {
  11110. uint32_t result_type = ops[0];
  11111. uint32_t id = ops[1];
  11112. uint32_t ptr = ops[2];
  11113. flush_variable_declaration(ptr);
  11114. // If we're loading from memory that cannot be changed by the shader,
  11115. // just forward the expression directly to avoid needless temporaries.
  11116. // If an expression is mutable and forwardable, we speculate that it is immutable.
  11117. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  11118. // If loading a non-native row-major matrix, mark the expression as need_transpose.
  11119. bool need_transpose = false;
  11120. bool old_need_transpose = false;
  11121. auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
  11122. if (forward)
  11123. {
  11124. // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
  11125. // taking the expression.
  11126. if (ptr_expression && ptr_expression->need_transpose)
  11127. {
  11128. old_need_transpose = true;
  11129. ptr_expression->need_transpose = false;
  11130. need_transpose = true;
  11131. }
  11132. else if (is_non_native_row_major_matrix(ptr))
  11133. need_transpose = true;
  11134. }
  11135. // If we are forwarding this load,
  11136. // don't register the read to access chain here, defer that to when we actually use the expression,
  11137. // using the add_implied_read_expression mechanism.
  11138. string expr;
  11139. bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
  11140. bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
  11141. if (forward || (!is_packed && !is_remapped))
  11142. {
  11143. // For the simple case, we do not need to deal with repacking.
  11144. expr = to_dereferenced_expression(ptr, false);
  11145. }
  11146. else
  11147. {
  11148. // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
  11149. // storing the expression to a temporary.
  11150. expr = to_unpacked_expression(ptr);
  11151. }
  11152. auto &type = get<SPIRType>(result_type);
  11153. auto &expr_type = expression_type(ptr);
  11154. // If the expression has more vector components than the result type, insert
  11155. // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
  11156. // happen with e.g. the MSL backend replacing the type of an input variable.
  11157. if (expr_type.vecsize > type.vecsize)
  11158. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  11159. if (forward && ptr_expression)
  11160. ptr_expression->need_transpose = old_need_transpose;
  11161. // We might need to cast in order to load from a builtin.
  11162. cast_from_variable_load(ptr, expr, type);
  11163. if (forward && ptr_expression)
  11164. ptr_expression->need_transpose = false;
  11165. // We might be trying to load a gl_Position[N], where we should be
  11166. // doing float4[](gl_in[i].gl_Position, ...) instead.
  11167. // Similar workarounds are required for input arrays in tessellation.
  11168. // Also, loading from gl_SampleMask array needs special unroll.
  11169. unroll_array_from_complex_load(id, ptr, expr);
  11170. if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
  11171. {
  11172. // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
  11173. convert_non_uniform_expression(expr, ptr);
  11174. }
  11175. if (forward && ptr_expression)
  11176. ptr_expression->need_transpose = old_need_transpose;
  11177. bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
  11178. if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
  11179. rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
  11180. // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
  11181. // However, if we try to load a complex, composite object from a flattened buffer,
  11182. // we should avoid emitting the same code over and over and lower the result to a temporary.
  11183. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
  11184. SPIRExpression *e = nullptr;
  11185. if (!forward && expression_is_non_value_type_array(ptr))
  11186. {
  11187. // Complicated load case where we need to make a copy of ptr, but we cannot, because
  11188. // it is an array, and our backend does not support arrays as value types.
  11189. // Emit the temporary, and copy it explicitly.
  11190. e = &emit_uninitialized_temporary_expression(result_type, id);
  11191. emit_array_copy(nullptr, id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
  11192. }
  11193. else
  11194. e = &emit_op(result_type, id, expr, forward, !usage_tracking);
  11195. e->need_transpose = need_transpose;
  11196. register_read(id, ptr, forward);
  11197. if (forward)
  11198. {
  11199. // Pass through whether the result is of a packed type and the physical type ID.
  11200. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
  11201. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11202. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
  11203. {
  11204. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
  11205. get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
  11206. }
  11207. }
  11208. else
  11209. {
  11210. // This might have been set on an earlier compilation iteration, force it to be unset.
  11211. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11212. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  11213. }
  11214. inherit_expression_dependencies(id, ptr);
  11215. if (forward)
  11216. add_implied_read_expression(*e, ptr);
  11217. break;
  11218. }
  11219. case OpInBoundsAccessChain:
  11220. case OpAccessChain:
  11221. case OpPtrAccessChain:
  11222. {
  11223. auto *var = maybe_get<SPIRVariable>(ops[2]);
  11224. if (var)
  11225. flush_variable_declaration(var->self);
  11226. // If the base is immutable, the access chain pointer must also be.
  11227. // If an expression is mutable and forwardable, we speculate that it is immutable.
  11228. AccessChainMeta meta;
  11229. bool ptr_chain = opcode == OpPtrAccessChain;
  11230. auto &target_type = get<SPIRType>(ops[0]);
  11231. auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain);
  11232. // If the base is flattened UBO of struct type, the expression has to be a composite.
  11233. // In that case, backends which do not support inline syntax need it to be bound to a temporary.
  11234. // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
  11235. bool requires_temporary = false;
  11236. if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct)
  11237. requires_temporary = !backend.can_declare_struct_inline;
  11238. auto &expr = requires_temporary ?
  11239. emit_op(ops[0], ops[1], std::move(e), false) :
  11240. set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
  11241. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  11242. expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
  11243. expr.need_transpose = meta.need_transpose;
  11244. expr.access_chain = true;
  11245. expr.access_meshlet_position_y = meta.access_meshlet_position_y;
  11246. // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
  11247. if (meta.storage_is_packed)
  11248. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  11249. if (meta.storage_physical_type != 0)
  11250. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  11251. if (meta.storage_is_invariant)
  11252. set_decoration(ops[1], DecorationInvariant);
  11253. if (meta.flattened_struct)
  11254. flattened_structs[ops[1]] = true;
  11255. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  11256. set_decoration(ops[1], DecorationRelaxedPrecision);
  11257. if (meta.chain_is_builtin)
  11258. set_decoration(ops[1], DecorationBuiltIn, meta.builtin);
  11259. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  11260. // temporary which could be subject to invalidation.
  11261. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  11262. forwarded_temporaries.insert(ops[1]);
  11263. // The access chain itself is never forced to a temporary, but its dependencies might.
  11264. suppressed_usage_tracking.insert(ops[1]);
  11265. for (uint32_t i = 2; i < length; i++)
  11266. {
  11267. inherit_expression_dependencies(ops[1], ops[i]);
  11268. add_implied_read_expression(expr, ops[i]);
  11269. }
  11270. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  11271. // we're not forwarded after all.
  11272. if (expr.expression_dependencies.empty())
  11273. forwarded_temporaries.erase(ops[1]);
  11274. break;
  11275. }
  11276. case OpStore:
  11277. {
  11278. auto *var = maybe_get<SPIRVariable>(ops[0]);
  11279. if (var && var->statically_assigned)
  11280. var->static_expression = ops[1];
  11281. else if (var && var->loop_variable && !var->loop_variable_enable)
  11282. var->static_expression = ops[1];
  11283. else if (var && var->remapped_variable && var->static_expression)
  11284. {
  11285. // Skip the write.
  11286. }
  11287. else if (flattened_structs.count(ops[0]))
  11288. {
  11289. store_flattened_struct(ops[0], ops[1]);
  11290. register_write(ops[0]);
  11291. }
  11292. else
  11293. {
  11294. emit_store_statement(ops[0], ops[1]);
  11295. }
  11296. // Storing a pointer results in a variable pointer, so we must conservatively assume
  11297. // we can write through it.
  11298. if (expression_type(ops[1]).pointer)
  11299. register_write(ops[1]);
  11300. break;
  11301. }
  11302. case OpArrayLength:
  11303. {
  11304. uint32_t result_type = ops[0];
  11305. uint32_t id = ops[1];
  11306. auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11307. if (has_decoration(ops[2], DecorationNonUniform))
  11308. convert_non_uniform_expression(e, ops[2]);
  11309. set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
  11310. true);
  11311. break;
  11312. }
  11313. // Function calls
  11314. case OpFunctionCall:
  11315. {
  11316. uint32_t result_type = ops[0];
  11317. uint32_t id = ops[1];
  11318. uint32_t func = ops[2];
  11319. const auto *arg = &ops[3];
  11320. length -= 3;
  11321. auto &callee = get<SPIRFunction>(func);
  11322. auto &return_type = get<SPIRType>(callee.return_type);
  11323. bool pure = function_is_pure(callee);
  11324. bool control_dependent = function_is_control_dependent(callee);
  11325. bool callee_has_out_variables = false;
  11326. bool emit_return_value_as_argument = false;
  11327. // Invalidate out variables passed to functions since they can be OpStore'd to.
  11328. for (uint32_t i = 0; i < length; i++)
  11329. {
  11330. if (callee.arguments[i].write_count)
  11331. {
  11332. register_call_out_argument(arg[i]);
  11333. callee_has_out_variables = true;
  11334. }
  11335. flush_variable_declaration(arg[i]);
  11336. }
  11337. if (!return_type.array.empty() && !backend.can_return_array)
  11338. {
  11339. callee_has_out_variables = true;
  11340. emit_return_value_as_argument = true;
  11341. }
  11342. if (!pure)
  11343. register_impure_function_call();
  11344. string funexpr;
  11345. SmallVector<string> arglist;
  11346. funexpr += to_name(func) + "(";
  11347. if (emit_return_value_as_argument)
  11348. {
  11349. statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type, 0), ";");
  11350. arglist.push_back(to_name(id));
  11351. }
  11352. for (uint32_t i = 0; i < length; i++)
  11353. {
  11354. // Do not pass in separate images or samplers if we're remapping
  11355. // to combined image samplers.
  11356. if (skip_argument(arg[i]))
  11357. continue;
  11358. arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
  11359. }
  11360. for (auto &combined : callee.combined_parameters)
  11361. {
  11362. auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
  11363. auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
  11364. arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
  11365. }
  11366. append_global_func_args(callee, length, arglist);
  11367. funexpr += merge(arglist);
  11368. funexpr += ")";
  11369. // Check for function call constraints.
  11370. check_function_call_constraints(arg, length);
  11371. if (return_type.basetype != SPIRType::Void)
  11372. {
  11373. // If the function actually writes to an out variable,
  11374. // take the conservative route and do not forward.
  11375. // The problem is that we might not read the function
  11376. // result (and emit the function) before an out variable
  11377. // is read (common case when return value is ignored!
  11378. // In order to avoid start tracking invalid variables,
  11379. // just avoid the forwarding problem altogether.
  11380. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
  11381. (forced_temporaries.find(id) == end(forced_temporaries));
  11382. if (emit_return_value_as_argument)
  11383. {
  11384. statement(funexpr, ";");
  11385. set<SPIRExpression>(id, to_name(id), result_type, true);
  11386. }
  11387. else
  11388. emit_op(result_type, id, funexpr, forward);
  11389. // Function calls are implicit loads from all variables in question.
  11390. // Set dependencies for them.
  11391. for (uint32_t i = 0; i < length; i++)
  11392. register_read(id, arg[i], forward);
  11393. // If we're going to forward the temporary result,
  11394. // put dependencies on every variable that must not change.
  11395. if (forward)
  11396. register_global_read_dependencies(callee, id);
  11397. }
  11398. else
  11399. statement(funexpr, ";");
  11400. if (control_dependent)
  11401. register_control_dependent_expression(id);
  11402. break;
  11403. }
  11404. // Composite munging
  11405. case OpCompositeConstruct:
  11406. {
  11407. uint32_t result_type = ops[0];
  11408. uint32_t id = ops[1];
  11409. const auto *const elems = &ops[2];
  11410. length -= 2;
  11411. bool forward = true;
  11412. for (uint32_t i = 0; i < length; i++)
  11413. forward = forward && should_forward(elems[i]);
  11414. auto &out_type = get<SPIRType>(result_type);
  11415. auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
  11416. // Only splat if we have vector constructors.
  11417. // Arrays and structs must be initialized properly in full.
  11418. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
  11419. bool splat = false;
  11420. bool swizzle_splat = false;
  11421. if (in_type)
  11422. {
  11423. splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
  11424. swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
  11425. if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
  11426. {
  11427. // Cannot swizzle literal integers as a special case.
  11428. swizzle_splat = false;
  11429. }
  11430. }
  11431. if (splat || swizzle_splat)
  11432. {
  11433. uint32_t input = elems[0];
  11434. for (uint32_t i = 0; i < length; i++)
  11435. {
  11436. if (input != elems[i])
  11437. {
  11438. splat = false;
  11439. swizzle_splat = false;
  11440. }
  11441. }
  11442. }
  11443. if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  11444. forward = false;
  11445. if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
  11446. forward = false;
  11447. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11448. forward = false;
  11449. string constructor_op;
  11450. if (backend.use_initializer_list && composite)
  11451. {
  11452. bool needs_trailing_tracket = false;
  11453. // Only use this path if we are building composites.
  11454. // This path cannot be used for arithmetic.
  11455. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
  11456. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
  11457. else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
  11458. {
  11459. // MSL path. Array constructor is baked into type here, do not use _constructor variant.
  11460. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  11461. needs_trailing_tracket = true;
  11462. }
  11463. constructor_op += "{ ";
  11464. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11465. constructor_op += "0";
  11466. else if (splat)
  11467. constructor_op += to_unpacked_expression(elems[0]);
  11468. else
  11469. constructor_op += build_composite_combiner(result_type, elems, length);
  11470. constructor_op += " }";
  11471. if (needs_trailing_tracket)
  11472. constructor_op += ")";
  11473. }
  11474. else if (swizzle_splat && !composite)
  11475. {
  11476. constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
  11477. }
  11478. else
  11479. {
  11480. constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  11481. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11482. constructor_op += "0";
  11483. else if (splat)
  11484. constructor_op += to_unpacked_expression(elems[0]);
  11485. else
  11486. constructor_op += build_composite_combiner(result_type, elems, length);
  11487. constructor_op += ")";
  11488. }
  11489. if (!constructor_op.empty())
  11490. {
  11491. emit_op(result_type, id, constructor_op, forward);
  11492. for (uint32_t i = 0; i < length; i++)
  11493. inherit_expression_dependencies(id, elems[i]);
  11494. }
  11495. break;
  11496. }
  11497. case OpVectorInsertDynamic:
  11498. {
  11499. uint32_t result_type = ops[0];
  11500. uint32_t id = ops[1];
  11501. uint32_t vec = ops[2];
  11502. uint32_t comp = ops[3];
  11503. uint32_t index = ops[4];
  11504. flush_variable_declaration(vec);
  11505. // Make a copy, then use access chain to store the variable.
  11506. statement(declare_temporary(result_type, id), to_expression(vec), ";");
  11507. set<SPIRExpression>(id, to_name(id), result_type, true);
  11508. auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
  11509. statement(chain, " = ", to_unpacked_expression(comp), ";");
  11510. break;
  11511. }
  11512. case OpVectorExtractDynamic:
  11513. {
  11514. uint32_t result_type = ops[0];
  11515. uint32_t id = ops[1];
  11516. auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
  11517. emit_op(result_type, id, expr, should_forward(ops[2]));
  11518. inherit_expression_dependencies(id, ops[2]);
  11519. inherit_expression_dependencies(id, ops[3]);
  11520. break;
  11521. }
  11522. case OpCompositeExtract:
  11523. {
  11524. uint32_t result_type = ops[0];
  11525. uint32_t id = ops[1];
  11526. length -= 3;
  11527. auto &type = get<SPIRType>(result_type);
  11528. // We can only split the expression here if our expression is forwarded as a temporary.
  11529. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
  11530. // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
  11531. auto &composite_type = expression_type(ops[2]);
  11532. bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
  11533. if (composite_type_is_complex)
  11534. allow_base_expression = false;
  11535. if (composite_type.op == OpTypeCooperativeMatrixKHR)
  11536. allow_base_expression = false;
  11537. // Packed expressions or physical ID mapped expressions cannot be split up.
  11538. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
  11539. has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
  11540. allow_base_expression = false;
  11541. // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
  11542. // into the base expression.
  11543. if (is_non_native_row_major_matrix(ops[2]))
  11544. allow_base_expression = false;
  11545. AccessChainMeta meta;
  11546. SPIRExpression *e = nullptr;
  11547. auto *c = maybe_get<SPIRConstant>(ops[2]);
  11548. if (c && !c->specialization && !composite_type_is_complex)
  11549. {
  11550. auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
  11551. e = &emit_op(result_type, id, expr, true, true);
  11552. }
  11553. else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
  11554. {
  11555. // Only apply this optimization if result is scalar.
  11556. // We want to split the access chain from the base.
  11557. // This is so we can later combine different CompositeExtract results
  11558. // with CompositeConstruct without emitting code like
  11559. //
  11560. // vec3 temp = texture(...).xyz
  11561. // vec4(temp.x, temp.y, temp.z, 1.0).
  11562. //
  11563. // when we actually wanted to emit this
  11564. // vec4(texture(...).xyz, 1.0).
  11565. //
  11566. // Including the base will prevent this and would trigger multiple reads
  11567. // from expression causing it to be forced to an actual temporary in GLSL.
  11568. auto expr = access_chain_internal(ops[2], &ops[3], length,
  11569. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
  11570. ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  11571. e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
  11572. inherit_expression_dependencies(id, ops[2]);
  11573. e->base_expression = ops[2];
  11574. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  11575. set_decoration(ops[1], DecorationRelaxedPrecision);
  11576. }
  11577. else
  11578. {
  11579. auto expr = access_chain_internal(ops[2], &ops[3], length,
  11580. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  11581. e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
  11582. inherit_expression_dependencies(id, ops[2]);
  11583. }
  11584. // Pass through some meta information to the loaded expression.
  11585. // We can still end up loading a buffer type to a variable, then CompositeExtract from it
  11586. // instead of loading everything through an access chain.
  11587. e->need_transpose = meta.need_transpose;
  11588. if (meta.storage_is_packed)
  11589. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11590. if (meta.storage_physical_type != 0)
  11591. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  11592. if (meta.storage_is_invariant)
  11593. set_decoration(id, DecorationInvariant);
  11594. break;
  11595. }
  11596. case OpCompositeInsert:
  11597. {
  11598. uint32_t result_type = ops[0];
  11599. uint32_t id = ops[1];
  11600. uint32_t obj = ops[2];
  11601. uint32_t composite = ops[3];
  11602. const auto *elems = &ops[4];
  11603. length -= 4;
  11604. flush_variable_declaration(composite);
  11605. // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
  11606. // Speculate that the input composite is no longer used, and we can modify it in-place.
  11607. // There are various scenarios where this is not possible to satisfy.
  11608. bool can_modify_in_place = true;
  11609. forced_temporaries.insert(id);
  11610. // Cannot safely RMW PHI variables since they have no way to be invalidated,
  11611. // forcing temporaries is not going to help.
  11612. // This is similar for Constant and Undef inputs.
  11613. // The only safe thing to RMW is SPIRExpression.
  11614. // If the expression has already been used (i.e. used in a continue block), we have to keep using
  11615. // that loop variable, since we won't be able to override the expression after the fact.
  11616. // If the composite is hoisted, we might never be able to properly invalidate any usage
  11617. // of that composite in a subsequent loop iteration.
  11618. if (invalid_expressions.count(composite) ||
  11619. block_composite_insert_overwrite.count(composite) ||
  11620. hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) ||
  11621. maybe_get<SPIRExpression>(composite) == nullptr)
  11622. {
  11623. can_modify_in_place = false;
  11624. }
  11625. else if (backend.requires_relaxed_precision_analysis &&
  11626. has_decoration(composite, DecorationRelaxedPrecision) !=
  11627. has_decoration(id, DecorationRelaxedPrecision) &&
  11628. get<SPIRType>(result_type).basetype != SPIRType::Struct)
  11629. {
  11630. // Similarly, if precision does not match for input and output,
  11631. // we cannot alias them. If we write a composite into a relaxed precision
  11632. // ID, we might get a false truncation.
  11633. can_modify_in_place = false;
  11634. }
  11635. if (can_modify_in_place)
  11636. {
  11637. // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
  11638. if (!forced_temporaries.count(composite))
  11639. force_temporary_and_recompile(composite);
  11640. auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11641. statement(chain, " = ", to_unpacked_expression(obj), ";");
  11642. set<SPIRExpression>(id, to_expression(composite), result_type, true);
  11643. invalid_expressions.insert(composite);
  11644. composite_insert_overwritten.insert(composite);
  11645. }
  11646. else
  11647. {
  11648. if (maybe_get<SPIRUndef>(composite) != nullptr)
  11649. {
  11650. emit_uninitialized_temporary_expression(result_type, id);
  11651. }
  11652. else
  11653. {
  11654. // Make a copy, then use access chain to store the variable.
  11655. statement(declare_temporary(result_type, id), to_expression(composite), ";");
  11656. set<SPIRExpression>(id, to_name(id), result_type, true);
  11657. }
  11658. auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11659. statement(chain, " = ", to_unpacked_expression(obj), ";");
  11660. }
  11661. break;
  11662. }
  11663. case OpCopyMemory:
  11664. {
  11665. uint32_t lhs = ops[0];
  11666. uint32_t rhs = ops[1];
  11667. if (lhs != rhs)
  11668. {
  11669. uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
  11670. if (!tmp_id)
  11671. tmp_id = ir.increase_bound_by(1);
  11672. uint32_t tmp_type_id = expression_type(rhs).parent_type;
  11673. EmbeddedInstruction fake_load, fake_store;
  11674. fake_load.op = OpLoad;
  11675. fake_load.length = 3;
  11676. fake_load.ops.push_back(tmp_type_id);
  11677. fake_load.ops.push_back(tmp_id);
  11678. fake_load.ops.push_back(rhs);
  11679. fake_store.op = OpStore;
  11680. fake_store.length = 2;
  11681. fake_store.ops.push_back(lhs);
  11682. fake_store.ops.push_back(tmp_id);
  11683. // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
  11684. // Synthesize a fake Load and Store pair for CopyMemory.
  11685. emit_instruction(fake_load);
  11686. emit_instruction(fake_store);
  11687. }
  11688. break;
  11689. }
  11690. case OpCopyLogical:
  11691. {
  11692. // This is used for copying object of different types, arrays and structs.
  11693. // We need to unroll the copy, element-by-element.
  11694. uint32_t result_type = ops[0];
  11695. uint32_t id = ops[1];
  11696. uint32_t rhs = ops[2];
  11697. emit_uninitialized_temporary_expression(result_type, id);
  11698. emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
  11699. break;
  11700. }
  11701. case OpCopyObject:
  11702. {
  11703. uint32_t result_type = ops[0];
  11704. uint32_t id = ops[1];
  11705. uint32_t rhs = ops[2];
  11706. bool pointer = get<SPIRType>(result_type).pointer;
  11707. auto *chain = maybe_get<SPIRAccessChain>(rhs);
  11708. auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
  11709. if (chain)
  11710. {
  11711. // Cannot lower to a SPIRExpression, just copy the object.
  11712. auto &e = set<SPIRAccessChain>(id, *chain);
  11713. e.self = id;
  11714. }
  11715. else if (imgsamp)
  11716. {
  11717. // Cannot lower to a SPIRExpression, just copy the object.
  11718. // GLSL does not currently use this type and will never get here, but MSL does.
  11719. // Handled here instead of CompilerMSL for better integration and general handling,
  11720. // and in case GLSL or other subclasses require it in the future.
  11721. auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
  11722. e.self = id;
  11723. }
  11724. else if (expression_is_lvalue(rhs) && !pointer)
  11725. {
  11726. // Need a copy.
  11727. // For pointer types, we copy the pointer itself.
  11728. emit_op(result_type, id, to_unpacked_expression(rhs), false);
  11729. }
  11730. else
  11731. {
  11732. // RHS expression is immutable, so just forward it.
  11733. // Copying these things really make no sense, but
  11734. // seems to be allowed anyways.
  11735. auto &e = emit_op(result_type, id, to_expression(rhs), true, true);
  11736. if (pointer)
  11737. {
  11738. auto *var = maybe_get_backing_variable(rhs);
  11739. e.loaded_from = var ? var->self : ID(0);
  11740. }
  11741. // If we're copying an access chain, need to inherit the read expressions.
  11742. auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
  11743. if (rhs_expr)
  11744. {
  11745. e.implied_read_expressions = rhs_expr->implied_read_expressions;
  11746. e.expression_dependencies = rhs_expr->expression_dependencies;
  11747. }
  11748. }
  11749. break;
  11750. }
  11751. case OpVectorShuffle:
  11752. {
  11753. uint32_t result_type = ops[0];
  11754. uint32_t id = ops[1];
  11755. uint32_t vec0 = ops[2];
  11756. uint32_t vec1 = ops[3];
  11757. const auto *elems = &ops[4];
  11758. length -= 4;
  11759. auto &type0 = expression_type(vec0);
  11760. // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
  11761. // or in our case, T(0).
  11762. bool shuffle = false;
  11763. for (uint32_t i = 0; i < length; i++)
  11764. if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
  11765. shuffle = true;
  11766. // Cannot use swizzles with packed expressions, force shuffle path.
  11767. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
  11768. shuffle = true;
  11769. string expr;
  11770. bool should_fwd, trivial_forward;
  11771. if (shuffle)
  11772. {
  11773. should_fwd = should_forward(vec0) && should_forward(vec1);
  11774. trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
  11775. // Constructor style and shuffling from two different vectors.
  11776. SmallVector<string> args;
  11777. for (uint32_t i = 0; i < length; i++)
  11778. {
  11779. if (elems[i] == 0xffffffffu)
  11780. {
  11781. // Use a constant 0 here.
  11782. // We could use the first component or similar, but then we risk propagating
  11783. // a value we might not need, and bog down codegen.
  11784. SPIRConstant c;
  11785. c.constant_type = type0.parent_type;
  11786. assert(type0.parent_type != ID(0));
  11787. args.push_back(constant_expression(c));
  11788. }
  11789. else if (elems[i] >= type0.vecsize)
  11790. args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
  11791. else
  11792. args.push_back(to_extract_component_expression(vec0, elems[i]));
  11793. }
  11794. expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
  11795. }
  11796. else
  11797. {
  11798. should_fwd = should_forward(vec0);
  11799. trivial_forward = should_suppress_usage_tracking(vec0);
  11800. // We only source from first vector, so can use swizzle.
  11801. // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
  11802. expr += to_enclosed_unpacked_expression(vec0);
  11803. expr += ".";
  11804. for (uint32_t i = 0; i < length; i++)
  11805. {
  11806. assert(elems[i] != 0xffffffffu);
  11807. expr += index_to_swizzle(elems[i]);
  11808. }
  11809. if (backend.swizzle_is_function && length > 1)
  11810. expr += "()";
  11811. }
  11812. // A shuffle is trivial in that it doesn't actually *do* anything.
  11813. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
  11814. emit_op(result_type, id, expr, should_fwd, trivial_forward);
  11815. inherit_expression_dependencies(id, vec0);
  11816. if (vec0 != vec1)
  11817. inherit_expression_dependencies(id, vec1);
  11818. break;
  11819. }
  11820. // ALU
  11821. case OpIsNan:
  11822. if (!is_legacy())
  11823. GLSL_UFOP(isnan);
  11824. else
  11825. {
  11826. // Check if the number doesn't equal itself
  11827. auto &type = get<SPIRType>(ops[0]);
  11828. if (type.vecsize > 1)
  11829. emit_binary_func_op(ops[0], ops[1], ops[2], ops[2], "notEqual");
  11830. else
  11831. emit_binary_op(ops[0], ops[1], ops[2], ops[2], "!=");
  11832. }
  11833. break;
  11834. case OpIsInf:
  11835. if (!is_legacy())
  11836. GLSL_UFOP(isinf);
  11837. else
  11838. {
  11839. // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0
  11840. // This is more reliable than checking if product with zero is NaN
  11841. uint32_t result_type = ops[0];
  11842. uint32_t result_id = ops[1];
  11843. uint32_t operand = ops[2];
  11844. auto &type = get<SPIRType>(result_type);
  11845. std::string expr;
  11846. if (type.vecsize > 1)
  11847. {
  11848. expr = type_to_glsl_constructor(type);
  11849. expr += '(';
  11850. for (uint32_t i = 0; i < type.vecsize; i++)
  11851. {
  11852. auto comp = to_extract_component_expression(operand, i);
  11853. expr += join(comp, " != 0.0 && 2.0 * ", comp, " == ", comp);
  11854. if (i + 1 < type.vecsize)
  11855. expr += ", ";
  11856. }
  11857. expr += ')';
  11858. }
  11859. else
  11860. {
  11861. // Register an extra read to force writing out a temporary
  11862. auto oper = to_enclosed_expression(operand);
  11863. track_expression_read(operand);
  11864. expr += join(oper, " != 0.0 && 2.0 * ", oper, " == ", oper);
  11865. }
  11866. emit_op(result_type, result_id, expr, should_forward(operand));
  11867. inherit_expression_dependencies(result_id, operand);
  11868. }
  11869. break;
  11870. case OpSNegate:
  11871. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  11872. GLSL_UOP_CAST(-);
  11873. else
  11874. GLSL_UOP(-);
  11875. break;
  11876. case OpFNegate:
  11877. GLSL_UOP(-);
  11878. break;
  11879. case OpIAdd:
  11880. {
  11881. // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
  11882. auto type = get<SPIRType>(ops[0]).basetype;
  11883. GLSL_BOP_CAST(+, type);
  11884. break;
  11885. }
  11886. case OpFAdd:
  11887. GLSL_BOP(+);
  11888. break;
  11889. case OpISub:
  11890. {
  11891. auto type = get<SPIRType>(ops[0]).basetype;
  11892. GLSL_BOP_CAST(-, type);
  11893. break;
  11894. }
  11895. case OpFSub:
  11896. GLSL_BOP(-);
  11897. break;
  11898. case OpIMul:
  11899. {
  11900. auto type = get<SPIRType>(ops[0]).basetype;
  11901. GLSL_BOP_CAST(*, type);
  11902. break;
  11903. }
  11904. case OpVectorTimesMatrix:
  11905. case OpMatrixTimesVector:
  11906. {
  11907. // If the matrix needs transpose, just flip the multiply order.
  11908. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  11909. if (e && e->need_transpose)
  11910. {
  11911. e->need_transpose = false;
  11912. string expr;
  11913. if (opcode == OpMatrixTimesVector)
  11914. expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
  11915. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11916. else
  11917. expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11918. to_enclosed_unpacked_expression(ops[2]));
  11919. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11920. emit_op(ops[0], ops[1], expr, forward);
  11921. e->need_transpose = true;
  11922. inherit_expression_dependencies(ops[1], ops[2]);
  11923. inherit_expression_dependencies(ops[1], ops[3]);
  11924. }
  11925. else
  11926. GLSL_BOP(*);
  11927. break;
  11928. }
  11929. case OpMatrixTimesMatrix:
  11930. {
  11931. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11932. auto *b = maybe_get<SPIRExpression>(ops[3]);
  11933. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  11934. // a^T * b^T = (b * a)^T.
  11935. if (a && b && a->need_transpose && b->need_transpose)
  11936. {
  11937. a->need_transpose = false;
  11938. b->need_transpose = false;
  11939. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11940. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11941. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11942. emit_transposed_op(ops[0], ops[1], expr, forward);
  11943. a->need_transpose = true;
  11944. b->need_transpose = true;
  11945. inherit_expression_dependencies(ops[1], ops[2]);
  11946. inherit_expression_dependencies(ops[1], ops[3]);
  11947. }
  11948. else
  11949. GLSL_BOP(*);
  11950. break;
  11951. }
  11952. case OpMatrixTimesScalar:
  11953. {
  11954. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11955. // If the matrix need transpose, just mark the result as needing so.
  11956. if (a && a->need_transpose)
  11957. {
  11958. a->need_transpose = false;
  11959. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), " * ",
  11960. to_enclosed_unpacked_expression(ops[3]));
  11961. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11962. emit_transposed_op(ops[0], ops[1], expr, forward);
  11963. a->need_transpose = true;
  11964. inherit_expression_dependencies(ops[1], ops[2]);
  11965. inherit_expression_dependencies(ops[1], ops[3]);
  11966. }
  11967. else
  11968. GLSL_BOP(*);
  11969. break;
  11970. }
  11971. case OpFMul:
  11972. case OpVectorTimesScalar:
  11973. GLSL_BOP(*);
  11974. break;
  11975. case OpOuterProduct:
  11976. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  11977. {
  11978. uint32_t result_type = ops[0];
  11979. uint32_t id = ops[1];
  11980. uint32_t a = ops[2];
  11981. uint32_t b = ops[3];
  11982. auto &type = get<SPIRType>(result_type);
  11983. string expr = type_to_glsl_constructor(type);
  11984. expr += "(";
  11985. for (uint32_t col = 0; col < type.columns; col++)
  11986. {
  11987. expr += to_enclosed_expression(a);
  11988. expr += " * ";
  11989. expr += to_extract_component_expression(b, col);
  11990. if (col + 1 < type.columns)
  11991. expr += ", ";
  11992. }
  11993. expr += ")";
  11994. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  11995. inherit_expression_dependencies(id, a);
  11996. inherit_expression_dependencies(id, b);
  11997. }
  11998. else
  11999. GLSL_BFOP(outerProduct);
  12000. break;
  12001. case OpDot:
  12002. GLSL_BFOP(dot);
  12003. break;
  12004. case OpTranspose:
  12005. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  12006. {
  12007. // transpose() is not available, so instead, flip need_transpose,
  12008. // which can later be turned into an emulated transpose op by
  12009. // convert_row_major_matrix(), if necessary.
  12010. uint32_t result_type = ops[0];
  12011. uint32_t result_id = ops[1];
  12012. uint32_t input = ops[2];
  12013. // Force need_transpose to false temporarily to prevent
  12014. // to_expression() from doing the transpose.
  12015. bool need_transpose = false;
  12016. auto *input_e = maybe_get<SPIRExpression>(input);
  12017. if (input_e)
  12018. swap(need_transpose, input_e->need_transpose);
  12019. bool forward = should_forward(input);
  12020. auto &e = emit_op(result_type, result_id, to_expression(input), forward);
  12021. e.need_transpose = !need_transpose;
  12022. // Restore the old need_transpose flag.
  12023. if (input_e)
  12024. input_e->need_transpose = need_transpose;
  12025. }
  12026. else
  12027. GLSL_UFOP(transpose);
  12028. break;
  12029. case OpSRem:
  12030. {
  12031. uint32_t result_type = ops[0];
  12032. uint32_t result_id = ops[1];
  12033. uint32_t op0 = ops[2];
  12034. uint32_t op1 = ops[3];
  12035. auto &out_type = get<SPIRType>(result_type);
  12036. bool forward = should_forward(op0) && should_forward(op1);
  12037. string cast_op0, cast_op1;
  12038. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, int_type, op0, op1, false);
  12039. // Needs special handling.
  12040. auto expr = join(cast_op0, " - ", cast_op1, " * ", "(", cast_op0, " / ", cast_op1, ")");
  12041. if (implicit_integer_promotion)
  12042. {
  12043. expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')');
  12044. }
  12045. else if (out_type.basetype != int_type)
  12046. {
  12047. expected_type.basetype = int_type;
  12048. expr = join(bitcast_glsl_op(out_type, expected_type), '(', expr, ')');
  12049. }
  12050. emit_op(result_type, result_id, expr, forward);
  12051. inherit_expression_dependencies(result_id, op0);
  12052. inherit_expression_dependencies(result_id, op1);
  12053. break;
  12054. }
  12055. case OpSDiv:
  12056. GLSL_BOP_CAST(/, int_type);
  12057. break;
  12058. case OpUDiv:
  12059. GLSL_BOP_CAST(/, uint_type);
  12060. break;
  12061. case OpIAddCarry:
  12062. case OpISubBorrow:
  12063. {
  12064. if (options.es && options.version < 310)
  12065. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  12066. else if (!options.es && options.version < 400)
  12067. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
  12068. uint32_t result_type = ops[0];
  12069. uint32_t result_id = ops[1];
  12070. uint32_t op0 = ops[2];
  12071. uint32_t op1 = ops[3];
  12072. auto &type = get<SPIRType>(result_type);
  12073. emit_uninitialized_temporary_expression(result_type, result_id);
  12074. const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
  12075. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
  12076. to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
  12077. break;
  12078. }
  12079. case OpUMulExtended:
  12080. case OpSMulExtended:
  12081. {
  12082. if (options.es && options.version < 310)
  12083. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  12084. else if (!options.es && options.version < 400)
  12085. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
  12086. uint32_t result_type = ops[0];
  12087. uint32_t result_id = ops[1];
  12088. uint32_t op0 = ops[2];
  12089. uint32_t op1 = ops[3];
  12090. auto &type = get<SPIRType>(result_type);
  12091. emit_uninitialized_temporary_expression(result_type, result_id);
  12092. const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
  12093. statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
  12094. to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
  12095. break;
  12096. }
  12097. case OpFDiv:
  12098. GLSL_BOP(/);
  12099. break;
  12100. case OpShiftRightLogical:
  12101. GLSL_BOP_CAST(>>, uint_type);
  12102. break;
  12103. case OpShiftRightArithmetic:
  12104. GLSL_BOP_CAST(>>, int_type);
  12105. break;
  12106. case OpShiftLeftLogical:
  12107. {
  12108. auto type = get<SPIRType>(ops[0]).basetype;
  12109. GLSL_BOP_CAST(<<, type);
  12110. break;
  12111. }
  12112. case OpBitwiseOr:
  12113. {
  12114. auto type = get<SPIRType>(ops[0]).basetype;
  12115. GLSL_BOP_CAST(|, type);
  12116. break;
  12117. }
  12118. case OpBitwiseXor:
  12119. {
  12120. auto type = get<SPIRType>(ops[0]).basetype;
  12121. GLSL_BOP_CAST(^, type);
  12122. break;
  12123. }
  12124. case OpBitwiseAnd:
  12125. {
  12126. auto type = get<SPIRType>(ops[0]).basetype;
  12127. GLSL_BOP_CAST(&, type);
  12128. break;
  12129. }
  12130. case OpNot:
  12131. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  12132. GLSL_UOP_CAST(~);
  12133. else
  12134. GLSL_UOP(~);
  12135. break;
  12136. case OpUMod:
  12137. GLSL_BOP_CAST(%, uint_type);
  12138. break;
  12139. case OpSMod:
  12140. GLSL_BOP_CAST(%, int_type);
  12141. break;
  12142. case OpFMod:
  12143. GLSL_BFOP(mod);
  12144. break;
  12145. case OpFRem:
  12146. {
  12147. uint32_t result_type = ops[0];
  12148. uint32_t result_id = ops[1];
  12149. uint32_t op0 = ops[2];
  12150. uint32_t op1 = ops[3];
  12151. // Needs special handling.
  12152. bool forward = should_forward(op0) && should_forward(op1);
  12153. std::string expr;
  12154. if (!is_legacy())
  12155. {
  12156. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
  12157. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  12158. }
  12159. else
  12160. {
  12161. // Legacy GLSL has no trunc, emulate by casting to int and back
  12162. auto &op0_type = expression_type(op0);
  12163. auto via_type = op0_type;
  12164. via_type.basetype = SPIRType::Int;
  12165. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ",
  12166. type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(",
  12167. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))");
  12168. }
  12169. emit_op(result_type, result_id, expr, forward);
  12170. inherit_expression_dependencies(result_id, op0);
  12171. inherit_expression_dependencies(result_id, op1);
  12172. break;
  12173. }
  12174. // Relational
  12175. case OpAny:
  12176. GLSL_UFOP(any);
  12177. break;
  12178. case OpAll:
  12179. GLSL_UFOP(all);
  12180. break;
  12181. case OpSelect:
  12182. emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
  12183. break;
  12184. case OpLogicalOr:
  12185. {
  12186. // No vector variant in GLSL for logical OR.
  12187. auto result_type = ops[0];
  12188. auto id = ops[1];
  12189. auto &type = get<SPIRType>(result_type);
  12190. if (type.vecsize > 1)
  12191. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
  12192. else
  12193. GLSL_BOP(||);
  12194. break;
  12195. }
  12196. case OpLogicalAnd:
  12197. {
  12198. // No vector variant in GLSL for logical AND.
  12199. auto result_type = ops[0];
  12200. auto id = ops[1];
  12201. auto &type = get<SPIRType>(result_type);
  12202. if (type.vecsize > 1)
  12203. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
  12204. else
  12205. GLSL_BOP(&&);
  12206. break;
  12207. }
  12208. case OpLogicalNot:
  12209. {
  12210. auto &type = get<SPIRType>(ops[0]);
  12211. if (type.vecsize > 1)
  12212. GLSL_UFOP(not );
  12213. else
  12214. GLSL_UOP(!);
  12215. break;
  12216. }
  12217. case OpIEqual:
  12218. {
  12219. if (expression_type(ops[2]).vecsize > 1)
  12220. GLSL_BFOP_CAST(equal, int_type);
  12221. else
  12222. GLSL_BOP_CAST(==, int_type);
  12223. break;
  12224. }
  12225. case OpLogicalEqual:
  12226. case OpFOrdEqual:
  12227. {
  12228. if (expression_type(ops[2]).vecsize > 1)
  12229. GLSL_BFOP(equal);
  12230. else
  12231. GLSL_BOP(==);
  12232. break;
  12233. }
  12234. case OpINotEqual:
  12235. {
  12236. if (expression_type(ops[2]).vecsize > 1)
  12237. GLSL_BFOP_CAST(notEqual, int_type);
  12238. else
  12239. GLSL_BOP_CAST(!=, int_type);
  12240. break;
  12241. }
  12242. case OpLogicalNotEqual:
  12243. case OpFOrdNotEqual:
  12244. case OpFUnordNotEqual:
  12245. {
  12246. // GLSL is fuzzy on what to do with ordered vs unordered not equal.
  12247. // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
  12248. // but this means we have no easy way of implementing ordered not equal.
  12249. if (expression_type(ops[2]).vecsize > 1)
  12250. GLSL_BFOP(notEqual);
  12251. else
  12252. GLSL_BOP(!=);
  12253. break;
  12254. }
  12255. case OpUGreaterThan:
  12256. case OpSGreaterThan:
  12257. {
  12258. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  12259. if (expression_type(ops[2]).vecsize > 1)
  12260. GLSL_BFOP_CAST(greaterThan, type);
  12261. else
  12262. GLSL_BOP_CAST(>, type);
  12263. break;
  12264. }
  12265. case OpFOrdGreaterThan:
  12266. {
  12267. if (expression_type(ops[2]).vecsize > 1)
  12268. GLSL_BFOP(greaterThan);
  12269. else
  12270. GLSL_BOP(>);
  12271. break;
  12272. }
  12273. case OpUGreaterThanEqual:
  12274. case OpSGreaterThanEqual:
  12275. {
  12276. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  12277. if (expression_type(ops[2]).vecsize > 1)
  12278. GLSL_BFOP_CAST(greaterThanEqual, type);
  12279. else
  12280. GLSL_BOP_CAST(>=, type);
  12281. break;
  12282. }
  12283. case OpFOrdGreaterThanEqual:
  12284. {
  12285. if (expression_type(ops[2]).vecsize > 1)
  12286. GLSL_BFOP(greaterThanEqual);
  12287. else
  12288. GLSL_BOP(>=);
  12289. break;
  12290. }
  12291. case OpULessThan:
  12292. case OpSLessThan:
  12293. {
  12294. auto type = opcode == OpULessThan ? uint_type : int_type;
  12295. if (expression_type(ops[2]).vecsize > 1)
  12296. GLSL_BFOP_CAST(lessThan, type);
  12297. else
  12298. GLSL_BOP_CAST(<, type);
  12299. break;
  12300. }
  12301. case OpFOrdLessThan:
  12302. {
  12303. if (expression_type(ops[2]).vecsize > 1)
  12304. GLSL_BFOP(lessThan);
  12305. else
  12306. GLSL_BOP(<);
  12307. break;
  12308. }
  12309. case OpULessThanEqual:
  12310. case OpSLessThanEqual:
  12311. {
  12312. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  12313. if (expression_type(ops[2]).vecsize > 1)
  12314. GLSL_BFOP_CAST(lessThanEqual, type);
  12315. else
  12316. GLSL_BOP_CAST(<=, type);
  12317. break;
  12318. }
  12319. case OpFOrdLessThanEqual:
  12320. {
  12321. if (expression_type(ops[2]).vecsize > 1)
  12322. GLSL_BFOP(lessThanEqual);
  12323. else
  12324. GLSL_BOP(<=);
  12325. break;
  12326. }
  12327. // Conversion
  12328. case OpSConvert:
  12329. case OpConvertSToF:
  12330. case OpUConvert:
  12331. case OpConvertUToF:
  12332. {
  12333. auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
  12334. uint32_t result_type = ops[0];
  12335. uint32_t id = ops[1];
  12336. auto &type = get<SPIRType>(result_type);
  12337. auto &arg_type = expression_type(ops[2]);
  12338. auto func = type_to_glsl_constructor(type);
  12339. if (arg_type.width < type.width || type_is_floating_point(type))
  12340. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
  12341. else
  12342. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  12343. break;
  12344. }
  12345. case OpConvertFToU:
  12346. case OpConvertFToS:
  12347. {
  12348. // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
  12349. uint32_t result_type = ops[0];
  12350. uint32_t id = ops[1];
  12351. auto &type = get<SPIRType>(result_type);
  12352. auto expected_type = type;
  12353. auto &float_type = expression_type(ops[2]);
  12354. expected_type.basetype =
  12355. opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
  12356. auto func = type_to_glsl_constructor(expected_type);
  12357. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
  12358. break;
  12359. }
  12360. case OpCooperativeMatrixConvertNV:
  12361. if (!options.vulkan_semantics)
  12362. SPIRV_CROSS_THROW("CooperativeMatrixConvertNV requires vulkan semantics.");
  12363. require_extension_internal("GL_NV_cooperative_matrix2");
  12364. // fallthrough
  12365. case OpFConvert:
  12366. {
  12367. uint32_t result_type = ops[0];
  12368. uint32_t id = ops[1];
  12369. auto &type = get<SPIRType>(result_type);
  12370. if (type.op == OpTypeCooperativeMatrixKHR && opcode == OpFConvert)
  12371. {
  12372. auto &expr_type = expression_type(ops[2]);
  12373. if (get<SPIRConstant>(type.ext.cooperative.use_id).scalar() !=
  12374. get<SPIRConstant>(expr_type.ext.cooperative.use_id).scalar())
  12375. {
  12376. // Somewhat questionable with spec constant uses.
  12377. if (!options.vulkan_semantics)
  12378. SPIRV_CROSS_THROW("NV_cooperative_matrix2 requires vulkan semantics.");
  12379. require_extension_internal("GL_NV_cooperative_matrix2");
  12380. }
  12381. }
  12382. if ((type.basetype == SPIRType::FloatE4M3 || type.basetype == SPIRType::FloatE5M2) &&
  12383. has_decoration(id, DecorationSaturatedToLargestFloat8NormalConversionEXT))
  12384. {
  12385. emit_uninitialized_temporary_expression(result_type, id);
  12386. statement("saturatedConvertEXT(", to_expression(id), ", ", to_unpacked_expression(ops[2]), ");");
  12387. }
  12388. else
  12389. {
  12390. auto func = type_to_glsl_constructor(type);
  12391. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  12392. }
  12393. break;
  12394. }
  12395. case OpBitcast:
  12396. {
  12397. uint32_t result_type = ops[0];
  12398. uint32_t id = ops[1];
  12399. uint32_t arg = ops[2];
  12400. if (!emit_complex_bitcast(result_type, id, arg))
  12401. {
  12402. auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
  12403. emit_unary_func_op(result_type, id, arg, op.c_str());
  12404. }
  12405. break;
  12406. }
  12407. case OpQuantizeToF16:
  12408. {
  12409. uint32_t result_type = ops[0];
  12410. uint32_t id = ops[1];
  12411. uint32_t arg = ops[2];
  12412. string op;
  12413. auto &type = get<SPIRType>(result_type);
  12414. switch (type.vecsize)
  12415. {
  12416. case 1:
  12417. op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
  12418. break;
  12419. case 2:
  12420. op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
  12421. break;
  12422. case 3:
  12423. {
  12424. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  12425. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
  12426. op = join("vec3(", op0, ", ", op1, ")");
  12427. break;
  12428. }
  12429. case 4:
  12430. {
  12431. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  12432. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
  12433. op = join("vec4(", op0, ", ", op1, ")");
  12434. break;
  12435. }
  12436. default:
  12437. SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
  12438. }
  12439. emit_op(result_type, id, op, should_forward(arg));
  12440. inherit_expression_dependencies(id, arg);
  12441. break;
  12442. }
  12443. // Derivatives
  12444. case OpDPdx:
  12445. GLSL_UFOP(dFdx);
  12446. if (is_legacy_es())
  12447. require_extension_internal("GL_OES_standard_derivatives");
  12448. register_control_dependent_expression(ops[1]);
  12449. break;
  12450. case OpDPdy:
  12451. GLSL_UFOP(dFdy);
  12452. if (is_legacy_es())
  12453. require_extension_internal("GL_OES_standard_derivatives");
  12454. register_control_dependent_expression(ops[1]);
  12455. break;
  12456. case OpDPdxFine:
  12457. GLSL_UFOP(dFdxFine);
  12458. if (options.es)
  12459. {
  12460. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12461. }
  12462. if (options.version < 450)
  12463. require_extension_internal("GL_ARB_derivative_control");
  12464. register_control_dependent_expression(ops[1]);
  12465. break;
  12466. case OpDPdyFine:
  12467. GLSL_UFOP(dFdyFine);
  12468. if (options.es)
  12469. {
  12470. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12471. }
  12472. if (options.version < 450)
  12473. require_extension_internal("GL_ARB_derivative_control");
  12474. register_control_dependent_expression(ops[1]);
  12475. break;
  12476. case OpDPdxCoarse:
  12477. if (options.es)
  12478. {
  12479. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12480. }
  12481. GLSL_UFOP(dFdxCoarse);
  12482. if (options.version < 450)
  12483. require_extension_internal("GL_ARB_derivative_control");
  12484. register_control_dependent_expression(ops[1]);
  12485. break;
  12486. case OpDPdyCoarse:
  12487. GLSL_UFOP(dFdyCoarse);
  12488. if (options.es)
  12489. {
  12490. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12491. }
  12492. if (options.version < 450)
  12493. require_extension_internal("GL_ARB_derivative_control");
  12494. register_control_dependent_expression(ops[1]);
  12495. break;
  12496. case OpFwidth:
  12497. GLSL_UFOP(fwidth);
  12498. if (is_legacy_es())
  12499. require_extension_internal("GL_OES_standard_derivatives");
  12500. register_control_dependent_expression(ops[1]);
  12501. break;
  12502. case OpFwidthCoarse:
  12503. GLSL_UFOP(fwidthCoarse);
  12504. if (options.es)
  12505. {
  12506. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12507. }
  12508. if (options.version < 450)
  12509. require_extension_internal("GL_ARB_derivative_control");
  12510. register_control_dependent_expression(ops[1]);
  12511. break;
  12512. case OpFwidthFine:
  12513. GLSL_UFOP(fwidthFine);
  12514. if (options.es)
  12515. {
  12516. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12517. }
  12518. if (options.version < 450)
  12519. require_extension_internal("GL_ARB_derivative_control");
  12520. register_control_dependent_expression(ops[1]);
  12521. break;
  12522. // Bitfield
  12523. case OpBitFieldInsert:
  12524. {
  12525. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
  12526. break;
  12527. }
  12528. case OpBitFieldSExtract:
  12529. {
  12530. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
  12531. SPIRType::Int, SPIRType::Int);
  12532. break;
  12533. }
  12534. case OpBitFieldUExtract:
  12535. {
  12536. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
  12537. SPIRType::Int, SPIRType::Int);
  12538. break;
  12539. }
  12540. case OpBitReverse:
  12541. // BitReverse does not have issues with sign since result type must match input type.
  12542. GLSL_UFOP(bitfieldReverse);
  12543. break;
  12544. case OpBitCount:
  12545. {
  12546. auto basetype = expression_type(ops[2]).basetype;
  12547. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
  12548. break;
  12549. }
  12550. // Atomics
  12551. case OpAtomicExchange:
  12552. {
  12553. uint32_t result_type = ops[0];
  12554. uint32_t id = ops[1];
  12555. uint32_t ptr = ops[2];
  12556. // Ignore semantics for now, probably only relevant to CL.
  12557. uint32_t val = ops[5];
  12558. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  12559. emit_atomic_func_op(result_type, id, ptr, val, op);
  12560. break;
  12561. }
  12562. case OpAtomicCompareExchange:
  12563. {
  12564. uint32_t result_type = ops[0];
  12565. uint32_t id = ops[1];
  12566. uint32_t ptr = ops[2];
  12567. uint32_t val = ops[6];
  12568. uint32_t comp = ops[7];
  12569. const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
  12570. emit_atomic_func_op(result_type, id, ptr, comp, val, op);
  12571. break;
  12572. }
  12573. case OpAtomicLoad:
  12574. {
  12575. // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
  12576. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  12577. auto &type = expression_type(ops[2]);
  12578. forced_temporaries.insert(ops[1]);
  12579. bool atomic_image = check_atomic_image(ops[2]);
  12580. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  12581. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  12582. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  12583. const char *increment = unsigned_type ? "0u" : "0";
  12584. emit_op(ops[0], ops[1],
  12585. join(op, "(",
  12586. to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
  12587. flush_all_atomic_capable_variables();
  12588. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12589. require_extension_internal("GL_EXT_shader_atomic_int64");
  12590. break;
  12591. }
  12592. case OpAtomicStore:
  12593. {
  12594. // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
  12595. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  12596. uint32_t ptr = ops[0];
  12597. // Ignore semantics for now, probably only relevant to CL.
  12598. uint32_t val = ops[3];
  12599. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  12600. statement(op, "(", to_atomic_ptr_expression(ptr), ", ", to_expression(val), ");");
  12601. flush_all_atomic_capable_variables();
  12602. auto &type = expression_type(ptr);
  12603. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12604. require_extension_internal("GL_EXT_shader_atomic_int64");
  12605. break;
  12606. }
  12607. case OpAtomicIIncrement:
  12608. case OpAtomicIDecrement:
  12609. {
  12610. forced_temporaries.insert(ops[1]);
  12611. auto &type = expression_type(ops[2]);
  12612. if (type.storage == StorageClassAtomicCounter)
  12613. {
  12614. // Legacy GLSL stuff, not sure if this is relevant to support.
  12615. if (opcode == OpAtomicIIncrement)
  12616. GLSL_UFOP(atomicCounterIncrement);
  12617. else
  12618. GLSL_UFOP(atomicCounterDecrement);
  12619. }
  12620. else
  12621. {
  12622. bool atomic_image = check_atomic_image(ops[2]);
  12623. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  12624. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  12625. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  12626. const char *increment = nullptr;
  12627. if (opcode == OpAtomicIIncrement && unsigned_type)
  12628. increment = "1u";
  12629. else if (opcode == OpAtomicIIncrement)
  12630. increment = "1";
  12631. else if (unsigned_type)
  12632. increment = "uint(-1)";
  12633. else
  12634. increment = "-1";
  12635. emit_op(ops[0], ops[1],
  12636. join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
  12637. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12638. require_extension_internal("GL_EXT_shader_atomic_int64");
  12639. }
  12640. flush_all_atomic_capable_variables();
  12641. break;
  12642. }
  12643. case OpAtomicIAdd:
  12644. case OpAtomicFAddEXT:
  12645. {
  12646. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  12647. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12648. break;
  12649. }
  12650. case OpAtomicISub:
  12651. {
  12652. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  12653. forced_temporaries.insert(ops[1]);
  12654. auto expr = join(op, "(", to_atomic_ptr_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
  12655. emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
  12656. flush_all_atomic_capable_variables();
  12657. auto &type = get<SPIRType>(ops[0]);
  12658. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12659. require_extension_internal("GL_EXT_shader_atomic_int64");
  12660. break;
  12661. }
  12662. case OpAtomicSMin:
  12663. case OpAtomicUMin:
  12664. {
  12665. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
  12666. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12667. break;
  12668. }
  12669. case OpAtomicSMax:
  12670. case OpAtomicUMax:
  12671. {
  12672. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
  12673. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12674. break;
  12675. }
  12676. case OpAtomicAnd:
  12677. {
  12678. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
  12679. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12680. break;
  12681. }
  12682. case OpAtomicOr:
  12683. {
  12684. const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
  12685. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12686. break;
  12687. }
  12688. case OpAtomicXor:
  12689. {
  12690. const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
  12691. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12692. break;
  12693. }
  12694. // Geometry shaders
  12695. case OpEmitVertex:
  12696. statement("EmitVertex();");
  12697. break;
  12698. case OpEndPrimitive:
  12699. statement("EndPrimitive();");
  12700. break;
  12701. case OpEmitStreamVertex:
  12702. {
  12703. if (options.es)
  12704. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  12705. else if (!options.es && options.version < 400)
  12706. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  12707. auto stream_expr = to_expression(ops[0]);
  12708. if (expression_type(ops[0]).basetype != SPIRType::Int)
  12709. stream_expr = join("int(", stream_expr, ")");
  12710. statement("EmitStreamVertex(", stream_expr, ");");
  12711. break;
  12712. }
  12713. case OpEndStreamPrimitive:
  12714. {
  12715. if (options.es)
  12716. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  12717. else if (!options.es && options.version < 400)
  12718. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  12719. auto stream_expr = to_expression(ops[0]);
  12720. if (expression_type(ops[0]).basetype != SPIRType::Int)
  12721. stream_expr = join("int(", stream_expr, ")");
  12722. statement("EndStreamPrimitive(", stream_expr, ");");
  12723. break;
  12724. }
  12725. // Textures
  12726. case OpImageSampleExplicitLod:
  12727. case OpImageSampleProjExplicitLod:
  12728. case OpImageSampleDrefExplicitLod:
  12729. case OpImageSampleProjDrefExplicitLod:
  12730. case OpImageSampleImplicitLod:
  12731. case OpImageSampleProjImplicitLod:
  12732. case OpImageSampleDrefImplicitLod:
  12733. case OpImageSampleProjDrefImplicitLod:
  12734. case OpImageFetch:
  12735. case OpImageGather:
  12736. case OpImageDrefGather:
  12737. // Gets a bit hairy, so move this to a separate instruction.
  12738. emit_texture_op(instruction, false);
  12739. break;
  12740. case OpImageSparseSampleExplicitLod:
  12741. case OpImageSparseSampleProjExplicitLod:
  12742. case OpImageSparseSampleDrefExplicitLod:
  12743. case OpImageSparseSampleProjDrefExplicitLod:
  12744. case OpImageSparseSampleImplicitLod:
  12745. case OpImageSparseSampleProjImplicitLod:
  12746. case OpImageSparseSampleDrefImplicitLod:
  12747. case OpImageSparseSampleProjDrefImplicitLod:
  12748. case OpImageSparseFetch:
  12749. case OpImageSparseGather:
  12750. case OpImageSparseDrefGather:
  12751. // Gets a bit hairy, so move this to a separate instruction.
  12752. emit_texture_op(instruction, true);
  12753. break;
  12754. case OpImageSparseTexelsResident:
  12755. if (options.es)
  12756. SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
  12757. require_extension_internal("GL_ARB_sparse_texture2");
  12758. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
  12759. break;
  12760. case OpImage:
  12761. {
  12762. uint32_t result_type = ops[0];
  12763. uint32_t id = ops[1];
  12764. // Suppress usage tracking.
  12765. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  12766. // When using the image, we need to know which variable it is actually loaded from.
  12767. auto *var = maybe_get_backing_variable(ops[2]);
  12768. e.loaded_from = var ? var->self : ID(0);
  12769. break;
  12770. }
  12771. case OpImageQueryLod:
  12772. {
  12773. const char *op = nullptr;
  12774. if (!options.es && options.version < 400)
  12775. {
  12776. require_extension_internal("GL_ARB_texture_query_lod");
  12777. // For some reason, the ARB spec is all-caps.
  12778. op = "textureQueryLOD";
  12779. }
  12780. else if (options.es)
  12781. {
  12782. if (options.version < 300)
  12783. SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES");
  12784. require_extension_internal("GL_EXT_texture_query_lod");
  12785. op = "textureQueryLOD";
  12786. }
  12787. else
  12788. op = "textureQueryLod";
  12789. auto sampler_expr = to_expression(ops[2]);
  12790. if (has_decoration(ops[2], DecorationNonUniform))
  12791. {
  12792. if (maybe_get_backing_variable(ops[2]))
  12793. convert_non_uniform_expression(sampler_expr, ops[2]);
  12794. else if (*backend.nonuniform_qualifier != '\0')
  12795. sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
  12796. }
  12797. bool forward = should_forward(ops[3]);
  12798. emit_op(ops[0], ops[1],
  12799. join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
  12800. forward);
  12801. inherit_expression_dependencies(ops[1], ops[2]);
  12802. inherit_expression_dependencies(ops[1], ops[3]);
  12803. register_control_dependent_expression(ops[1]);
  12804. break;
  12805. }
  12806. case OpImageQueryLevels:
  12807. {
  12808. uint32_t result_type = ops[0];
  12809. uint32_t id = ops[1];
  12810. if (!options.es && options.version < 430)
  12811. require_extension_internal("GL_ARB_texture_query_levels");
  12812. if (options.es)
  12813. SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
  12814. auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
  12815. auto &restype = get<SPIRType>(ops[0]);
  12816. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12817. emit_op(result_type, id, expr, true);
  12818. break;
  12819. }
  12820. case OpImageQuerySamples:
  12821. {
  12822. auto &type = expression_type(ops[2]);
  12823. uint32_t result_type = ops[0];
  12824. uint32_t id = ops[1];
  12825. if (options.es)
  12826. SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile.");
  12827. else if (options.version < 450)
  12828. require_extension_internal("GL_ARB_texture_query_samples");
  12829. string expr;
  12830. if (type.image.sampled == 2)
  12831. expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
  12832. else
  12833. expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
  12834. auto &restype = get<SPIRType>(ops[0]);
  12835. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12836. emit_op(result_type, id, expr, true);
  12837. break;
  12838. }
  12839. case OpSampledImage:
  12840. {
  12841. uint32_t result_type = ops[0];
  12842. uint32_t id = ops[1];
  12843. emit_sampled_image_op(result_type, id, ops[2], ops[3]);
  12844. inherit_expression_dependencies(id, ops[2]);
  12845. inherit_expression_dependencies(id, ops[3]);
  12846. break;
  12847. }
  12848. case OpImageQuerySizeLod:
  12849. {
  12850. uint32_t result_type = ops[0];
  12851. uint32_t id = ops[1];
  12852. uint32_t img = ops[2];
  12853. auto &type = expression_type(img);
  12854. auto &imgtype = get<SPIRType>(type.self);
  12855. std::string fname = "textureSize";
  12856. if (is_legacy_desktop())
  12857. {
  12858. fname = legacy_tex_op(fname, imgtype, img);
  12859. }
  12860. else if (is_legacy_es())
  12861. SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
  12862. auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
  12863. bitcast_expression(SPIRType::Int, ops[3]), ")");
  12864. // ES needs to emulate 1D images as 2D.
  12865. if (type.image.dim == Dim1D && options.es)
  12866. expr = join(expr, ".x");
  12867. auto &restype = get<SPIRType>(ops[0]);
  12868. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12869. emit_op(result_type, id, expr, true);
  12870. break;
  12871. }
  12872. // Image load/store
  12873. case OpImageRead:
  12874. case OpImageSparseRead:
  12875. {
  12876. // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
  12877. // not adding the proper qualifiers.
  12878. // If it turns out we need to read the image after all, remove the qualifier and recompile.
  12879. auto *var = maybe_get_backing_variable(ops[2]);
  12880. if (var)
  12881. {
  12882. auto &flags = get_decoration_bitset(var->self);
  12883. if (flags.get(DecorationNonReadable))
  12884. {
  12885. unset_decoration(var->self, DecorationNonReadable);
  12886. force_recompile();
  12887. }
  12888. }
  12889. uint32_t result_type = ops[0];
  12890. uint32_t id = ops[1];
  12891. bool pure;
  12892. string imgexpr;
  12893. auto &type = expression_type(ops[2]);
  12894. if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
  12895. {
  12896. if (type.image.ms)
  12897. SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
  12898. auto itr =
  12899. find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
  12900. if (itr == end(pls_inputs))
  12901. {
  12902. // For non-PLS inputs, we rely on subpass type remapping information to get it right
  12903. // since ImageRead always returns 4-component vectors and the backing type is opaque.
  12904. if (!var->remapped_components)
  12905. SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
  12906. imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
  12907. }
  12908. else
  12909. {
  12910. // PLS input could have different number of components than what the SPIR expects, swizzle to
  12911. // the appropriate vector size.
  12912. uint32_t components = pls_format_to_components(itr->format);
  12913. imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
  12914. }
  12915. pure = true;
  12916. }
  12917. else if (type.image.dim == DimSubpassData)
  12918. {
  12919. if (var && subpass_input_is_framebuffer_fetch(var->self))
  12920. {
  12921. imgexpr = to_expression(var->self);
  12922. }
  12923. else if (options.vulkan_semantics)
  12924. {
  12925. // With Vulkan semantics, use the proper Vulkan GLSL construct.
  12926. if (type.image.ms)
  12927. {
  12928. uint32_t operands = ops[4];
  12929. if (operands != ImageOperandsSampleMask || length != 6)
  12930. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12931. "operand mask was used.");
  12932. uint32_t samples = ops[5];
  12933. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
  12934. }
  12935. else
  12936. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
  12937. }
  12938. else
  12939. {
  12940. if (type.image.ms)
  12941. {
  12942. uint32_t operands = ops[4];
  12943. if (operands != ImageOperandsSampleMask || length != 6)
  12944. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12945. "operand mask was used.");
  12946. uint32_t samples = ops[5];
  12947. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
  12948. to_expression(samples), ")");
  12949. }
  12950. else
  12951. {
  12952. // Implement subpass loads via texture barrier style sampling.
  12953. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
  12954. }
  12955. }
  12956. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  12957. pure = true;
  12958. }
  12959. else
  12960. {
  12961. bool sparse = opcode == OpImageSparseRead;
  12962. uint32_t sparse_code_id = 0;
  12963. uint32_t sparse_texel_id = 0;
  12964. if (sparse)
  12965. emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
  12966. // imageLoad only accepts int coords, not uint.
  12967. auto coord_expr = to_expression(ops[3]);
  12968. auto target_coord_type = expression_type(ops[3]);
  12969. target_coord_type.basetype = SPIRType::Int;
  12970. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  12971. // ES needs to emulate 1D images as 2D.
  12972. if (type.image.dim == Dim1D && options.es)
  12973. coord_expr = join("ivec2(", coord_expr, ", 0)");
  12974. // Plain image load/store.
  12975. if (sparse)
  12976. {
  12977. if (type.image.ms)
  12978. {
  12979. uint32_t operands = ops[4];
  12980. if (operands != ImageOperandsSampleMask || length != 6)
  12981. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12982. "operand mask was used.");
  12983. uint32_t samples = ops[5];
  12984. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12985. coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
  12986. }
  12987. else
  12988. {
  12989. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12990. coord_expr, ", ", to_expression(sparse_texel_id), ");");
  12991. }
  12992. imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
  12993. to_expression(sparse_texel_id), ")");
  12994. }
  12995. else
  12996. {
  12997. if (type.image.ms)
  12998. {
  12999. uint32_t operands = ops[4];
  13000. if (operands != ImageOperandsSampleMask || length != 6)
  13001. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  13002. "operand mask was used.");
  13003. uint32_t samples = ops[5];
  13004. imgexpr =
  13005. join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
  13006. }
  13007. else
  13008. imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
  13009. }
  13010. if (!sparse)
  13011. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  13012. pure = false;
  13013. }
  13014. if (var)
  13015. {
  13016. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  13017. auto &e = emit_op(result_type, id, imgexpr, forward);
  13018. // We only need to track dependencies if we're reading from image load/store.
  13019. if (!pure)
  13020. {
  13021. e.loaded_from = var->self;
  13022. if (forward)
  13023. var->dependees.push_back(id);
  13024. }
  13025. }
  13026. else
  13027. emit_op(result_type, id, imgexpr, false);
  13028. inherit_expression_dependencies(id, ops[2]);
  13029. if (type.image.ms)
  13030. inherit_expression_dependencies(id, ops[5]);
  13031. break;
  13032. }
  13033. case OpImageTexelPointer:
  13034. {
  13035. uint32_t result_type = ops[0];
  13036. uint32_t id = ops[1];
  13037. auto coord_expr = to_expression(ops[3]);
  13038. auto target_coord_type = expression_type(ops[3]);
  13039. target_coord_type.basetype = SPIRType::Int;
  13040. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  13041. auto expr = join(to_expression(ops[2]), ", ", coord_expr);
  13042. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  13043. // When using the pointer, we need to know which variable it is actually loaded from.
  13044. auto *var = maybe_get_backing_variable(ops[2]);
  13045. e.loaded_from = var ? var->self : ID(0);
  13046. inherit_expression_dependencies(id, ops[3]);
  13047. break;
  13048. }
  13049. case OpImageWrite:
  13050. {
  13051. // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
  13052. // not adding the proper qualifiers.
  13053. // If it turns out we need to write to the image after all, remove the qualifier and recompile.
  13054. auto *var = maybe_get_backing_variable(ops[0]);
  13055. if (var)
  13056. {
  13057. if (has_decoration(var->self, DecorationNonWritable))
  13058. {
  13059. unset_decoration(var->self, DecorationNonWritable);
  13060. force_recompile();
  13061. }
  13062. }
  13063. auto &type = expression_type(ops[0]);
  13064. auto &value_type = expression_type(ops[2]);
  13065. auto store_type = value_type;
  13066. store_type.vecsize = 4;
  13067. // imageStore only accepts int coords, not uint.
  13068. auto coord_expr = to_expression(ops[1]);
  13069. auto target_coord_type = expression_type(ops[1]);
  13070. target_coord_type.basetype = SPIRType::Int;
  13071. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
  13072. // ES needs to emulate 1D images as 2D.
  13073. if (type.image.dim == Dim1D && options.es)
  13074. coord_expr = join("ivec2(", coord_expr, ", 0)");
  13075. if (type.image.ms)
  13076. {
  13077. uint32_t operands = ops[3];
  13078. if (operands != ImageOperandsSampleMask || length != 5)
  13079. SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
  13080. uint32_t samples = ops[4];
  13081. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
  13082. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  13083. }
  13084. else
  13085. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
  13086. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  13087. if (var && variable_storage_is_aliased(*var))
  13088. flush_all_aliased_variables();
  13089. break;
  13090. }
  13091. case OpImageQuerySize:
  13092. {
  13093. auto &type = expression_type(ops[2]);
  13094. uint32_t result_type = ops[0];
  13095. uint32_t id = ops[1];
  13096. if (type.basetype == SPIRType::Image)
  13097. {
  13098. string expr;
  13099. if (type.image.sampled == 2)
  13100. {
  13101. if (!options.es && options.version < 430)
  13102. require_extension_internal("GL_ARB_shader_image_size");
  13103. else if (options.es && options.version < 310)
  13104. SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
  13105. // The size of an image is always constant.
  13106. expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
  13107. }
  13108. else
  13109. {
  13110. // This path is hit for samplerBuffers and multisampled images which do not have LOD.
  13111. std::string fname = "textureSize";
  13112. if (is_legacy())
  13113. {
  13114. auto &imgtype = get<SPIRType>(type.self);
  13115. fname = legacy_tex_op(fname, imgtype, ops[2]);
  13116. }
  13117. expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
  13118. }
  13119. auto &restype = get<SPIRType>(ops[0]);
  13120. expr = bitcast_expression(restype, SPIRType::Int, expr);
  13121. emit_op(result_type, id, expr, true);
  13122. }
  13123. else
  13124. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  13125. break;
  13126. }
  13127. case OpImageSampleWeightedQCOM:
  13128. case OpImageBoxFilterQCOM:
  13129. case OpImageBlockMatchSSDQCOM:
  13130. case OpImageBlockMatchSADQCOM:
  13131. {
  13132. require_extension_internal("GL_QCOM_image_processing");
  13133. uint32_t result_type_id = ops[0];
  13134. uint32_t id = ops[1];
  13135. string expr;
  13136. switch (opcode)
  13137. {
  13138. case OpImageSampleWeightedQCOM:
  13139. expr = "textureWeightedQCOM";
  13140. break;
  13141. case OpImageBoxFilterQCOM:
  13142. expr = "textureBoxFilterQCOM";
  13143. break;
  13144. case OpImageBlockMatchSSDQCOM:
  13145. expr = "textureBlockMatchSSDQCOM";
  13146. break;
  13147. case OpImageBlockMatchSADQCOM:
  13148. expr = "textureBlockMatchSADQCOM";
  13149. break;
  13150. default:
  13151. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
  13152. }
  13153. expr += "(";
  13154. bool forward = false;
  13155. expr += to_expression(ops[2]);
  13156. expr += ", " + to_expression(ops[3]);
  13157. switch (opcode)
  13158. {
  13159. case OpImageSampleWeightedQCOM:
  13160. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13161. break;
  13162. case OpImageBoxFilterQCOM:
  13163. expr += ", " + to_expression(ops[4]);
  13164. break;
  13165. case OpImageBlockMatchSSDQCOM:
  13166. case OpImageBlockMatchSADQCOM:
  13167. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13168. expr += ", " + to_expression(ops[5]);
  13169. expr += ", " + to_expression(ops[6]);
  13170. break;
  13171. default:
  13172. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
  13173. }
  13174. expr += ")";
  13175. emit_op(result_type_id, id, expr, forward);
  13176. inherit_expression_dependencies(id, ops[3]);
  13177. if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM)
  13178. inherit_expression_dependencies(id, ops[5]);
  13179. break;
  13180. }
  13181. case OpImageBlockMatchWindowSSDQCOM:
  13182. case OpImageBlockMatchWindowSADQCOM:
  13183. case OpImageBlockMatchGatherSSDQCOM:
  13184. case OpImageBlockMatchGatherSADQCOM:
  13185. {
  13186. require_extension_internal("GL_QCOM_image_processing2");
  13187. uint32_t result_type_id = ops[0];
  13188. uint32_t id = ops[1];
  13189. string expr;
  13190. switch (opcode)
  13191. {
  13192. case OpImageBlockMatchWindowSSDQCOM:
  13193. expr = "textureBlockMatchWindowSSDQCOM";
  13194. break;
  13195. case OpImageBlockMatchWindowSADQCOM:
  13196. expr = "textureBlockMatchWindowSADQCOM";
  13197. break;
  13198. case OpImageBlockMatchGatherSSDQCOM:
  13199. expr = "textureBlockMatchGatherSSDQCOM";
  13200. break;
  13201. case OpImageBlockMatchGatherSADQCOM:
  13202. expr = "textureBlockMatchGatherSADQCOM";
  13203. break;
  13204. default:
  13205. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing2.");
  13206. }
  13207. expr += "(";
  13208. bool forward = false;
  13209. expr += to_expression(ops[2]);
  13210. expr += ", " + to_expression(ops[3]);
  13211. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13212. expr += ", " + to_expression(ops[5]);
  13213. expr += ", " + to_expression(ops[6]);
  13214. expr += ")";
  13215. emit_op(result_type_id, id, expr, forward);
  13216. inherit_expression_dependencies(id, ops[3]);
  13217. inherit_expression_dependencies(id, ops[5]);
  13218. break;
  13219. }
  13220. // Compute
  13221. case OpControlBarrier:
  13222. case OpMemoryBarrier:
  13223. {
  13224. uint32_t execution_scope = 0;
  13225. uint32_t memory;
  13226. uint32_t semantics;
  13227. if (opcode == OpMemoryBarrier)
  13228. {
  13229. memory = evaluate_constant_u32(ops[0]);
  13230. semantics = evaluate_constant_u32(ops[1]);
  13231. }
  13232. else
  13233. {
  13234. execution_scope = evaluate_constant_u32(ops[0]);
  13235. memory = evaluate_constant_u32(ops[1]);
  13236. semantics = evaluate_constant_u32(ops[2]);
  13237. }
  13238. if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
  13239. {
  13240. // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
  13241. if (opcode != OpControlBarrier)
  13242. {
  13243. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
  13244. }
  13245. else
  13246. {
  13247. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
  13248. }
  13249. }
  13250. if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
  13251. {
  13252. // Control shaders only have barriers, and it implies memory barriers.
  13253. if (opcode == OpControlBarrier)
  13254. statement("barrier();");
  13255. break;
  13256. }
  13257. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  13258. semantics = mask_relevant_memory_semantics(semantics);
  13259. if (opcode == OpMemoryBarrier)
  13260. {
  13261. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  13262. // does what we need, so we avoid redundant barriers.
  13263. const Instruction *next = get_next_instruction_in_block(instruction);
  13264. if (next && next->op == OpControlBarrier)
  13265. {
  13266. auto *next_ops = stream(*next);
  13267. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  13268. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  13269. next_semantics = mask_relevant_memory_semantics(next_semantics);
  13270. bool memory_scope_covered = false;
  13271. if (next_memory == memory)
  13272. memory_scope_covered = true;
  13273. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  13274. {
  13275. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  13276. // scope does not have to match.
  13277. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  13278. (memory == ScopeDevice || memory == ScopeWorkgroup))
  13279. {
  13280. memory_scope_covered = true;
  13281. }
  13282. }
  13283. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  13284. {
  13285. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  13286. memory_scope_covered = true;
  13287. }
  13288. // If we have the same memory scope, and all memory types are covered, we're good.
  13289. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  13290. break;
  13291. }
  13292. }
  13293. // We are synchronizing some memory or syncing execution,
  13294. // so we cannot forward any loads beyond the memory barrier.
  13295. if (semantics || opcode == OpControlBarrier)
  13296. {
  13297. assert(current_emitting_block);
  13298. flush_control_dependent_expressions(current_emitting_block->self);
  13299. flush_all_active_variables();
  13300. }
  13301. if (memory == ScopeWorkgroup) // Only need to consider memory within a group
  13302. {
  13303. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  13304. {
  13305. // OpControlBarrier implies a memory barrier for shared memory as well.
  13306. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
  13307. if (!implies_shared_barrier)
  13308. statement("memoryBarrierShared();");
  13309. }
  13310. else if (semantics != 0)
  13311. statement("groupMemoryBarrier();");
  13312. }
  13313. else if (memory == ScopeSubgroup)
  13314. {
  13315. const uint32_t all_barriers =
  13316. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  13317. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  13318. {
  13319. // These are not relevant for GLSL, but assume it means memoryBarrier().
  13320. // memoryBarrier() does everything, so no need to test anything else.
  13321. statement("subgroupMemoryBarrier();");
  13322. }
  13323. else if ((semantics & all_barriers) == all_barriers)
  13324. {
  13325. // Short-hand instead of emitting 3 barriers.
  13326. statement("subgroupMemoryBarrier();");
  13327. }
  13328. else
  13329. {
  13330. // Pick out individual barriers.
  13331. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  13332. statement("subgroupMemoryBarrierShared();");
  13333. if (semantics & MemorySemanticsUniformMemoryMask)
  13334. statement("subgroupMemoryBarrierBuffer();");
  13335. if (semantics & MemorySemanticsImageMemoryMask)
  13336. statement("subgroupMemoryBarrierImage();");
  13337. }
  13338. }
  13339. else
  13340. {
  13341. const uint32_t all_barriers =
  13342. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  13343. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  13344. {
  13345. // These are not relevant for GLSL, but assume it means memoryBarrier().
  13346. // memoryBarrier() does everything, so no need to test anything else.
  13347. statement("memoryBarrier();");
  13348. }
  13349. else if ((semantics & all_barriers) == all_barriers)
  13350. {
  13351. // Short-hand instead of emitting 4 barriers.
  13352. statement("memoryBarrier();");
  13353. }
  13354. else
  13355. {
  13356. // Pick out individual barriers.
  13357. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  13358. statement("memoryBarrierShared();");
  13359. if (semantics & MemorySemanticsUniformMemoryMask)
  13360. statement("memoryBarrierBuffer();");
  13361. if (semantics & MemorySemanticsImageMemoryMask)
  13362. statement("memoryBarrierImage();");
  13363. }
  13364. }
  13365. if (opcode == OpControlBarrier)
  13366. {
  13367. if (execution_scope == ScopeSubgroup)
  13368. statement("subgroupBarrier();");
  13369. else
  13370. statement("barrier();");
  13371. }
  13372. break;
  13373. }
  13374. case OpExtInstWithForwardRefsKHR:
  13375. {
  13376. uint32_t extension_set = ops[2];
  13377. auto ext = get<SPIRExtension>(extension_set).ext;
  13378. if (ext != SPIRExtension::SPV_debug_info &&
  13379. ext != SPIRExtension::NonSemanticShaderDebugInfo &&
  13380. ext != SPIRExtension::NonSemanticGeneric)
  13381. {
  13382. SPIRV_CROSS_THROW("Unexpected use of ExtInstWithForwardRefsKHR.");
  13383. }
  13384. break;
  13385. }
  13386. case OpExtInst:
  13387. {
  13388. uint32_t extension_set = ops[2];
  13389. auto ext = get<SPIRExtension>(extension_set).ext;
  13390. if (ext == SPIRExtension::GLSL)
  13391. {
  13392. emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13393. }
  13394. else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
  13395. {
  13396. emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13397. }
  13398. else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
  13399. {
  13400. emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13401. }
  13402. else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
  13403. {
  13404. emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13405. }
  13406. else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
  13407. {
  13408. emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13409. }
  13410. else if (ext == SPIRExtension::NonSemanticShaderDebugInfo)
  13411. {
  13412. emit_non_semantic_shader_debug_info(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13413. }
  13414. else if (ext == SPIRExtension::SPV_debug_info ||
  13415. ext == SPIRExtension::NonSemanticGeneric)
  13416. {
  13417. break; // Ignore SPIR-V debug information extended instructions.
  13418. }
  13419. else if (ext == SPIRExtension::NonSemanticDebugPrintf)
  13420. {
  13421. // Operation 1 is printf.
  13422. if (ops[3] == 1)
  13423. {
  13424. if (!options.vulkan_semantics)
  13425. SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
  13426. require_extension_internal("GL_EXT_debug_printf");
  13427. auto &format_string = get<SPIRString>(ops[4]).str;
  13428. string expr = join(backend.printf_function, "(\"", format_string, "\"");
  13429. for (uint32_t i = 5; i < length; i++)
  13430. {
  13431. expr += ", ";
  13432. expr += to_expression(ops[i]);
  13433. }
  13434. statement(expr, ");");
  13435. }
  13436. }
  13437. else
  13438. {
  13439. statement("// unimplemented ext op ", instruction.op);
  13440. break;
  13441. }
  13442. break;
  13443. }
  13444. // Legacy sub-group stuff ...
  13445. case OpSubgroupBallotKHR:
  13446. {
  13447. uint32_t result_type = ops[0];
  13448. uint32_t id = ops[1];
  13449. string expr;
  13450. expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
  13451. emit_op(result_type, id, expr, should_forward(ops[2]));
  13452. require_extension_internal("GL_ARB_shader_ballot");
  13453. inherit_expression_dependencies(id, ops[2]);
  13454. register_control_dependent_expression(ops[1]);
  13455. break;
  13456. }
  13457. case OpSubgroupFirstInvocationKHR:
  13458. {
  13459. uint32_t result_type = ops[0];
  13460. uint32_t id = ops[1];
  13461. emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
  13462. require_extension_internal("GL_ARB_shader_ballot");
  13463. register_control_dependent_expression(ops[1]);
  13464. break;
  13465. }
  13466. case OpSubgroupReadInvocationKHR:
  13467. {
  13468. uint32_t result_type = ops[0];
  13469. uint32_t id = ops[1];
  13470. emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
  13471. require_extension_internal("GL_ARB_shader_ballot");
  13472. register_control_dependent_expression(ops[1]);
  13473. break;
  13474. }
  13475. case OpSubgroupAllKHR:
  13476. {
  13477. uint32_t result_type = ops[0];
  13478. uint32_t id = ops[1];
  13479. emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
  13480. require_extension_internal("GL_ARB_shader_group_vote");
  13481. register_control_dependent_expression(ops[1]);
  13482. break;
  13483. }
  13484. case OpSubgroupAnyKHR:
  13485. {
  13486. uint32_t result_type = ops[0];
  13487. uint32_t id = ops[1];
  13488. emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
  13489. require_extension_internal("GL_ARB_shader_group_vote");
  13490. register_control_dependent_expression(ops[1]);
  13491. break;
  13492. }
  13493. case OpSubgroupAllEqualKHR:
  13494. {
  13495. uint32_t result_type = ops[0];
  13496. uint32_t id = ops[1];
  13497. emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
  13498. require_extension_internal("GL_ARB_shader_group_vote");
  13499. register_control_dependent_expression(ops[1]);
  13500. break;
  13501. }
  13502. case OpGroupIAddNonUniformAMD:
  13503. case OpGroupFAddNonUniformAMD:
  13504. {
  13505. uint32_t result_type = ops[0];
  13506. uint32_t id = ops[1];
  13507. emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
  13508. require_extension_internal("GL_AMD_shader_ballot");
  13509. register_control_dependent_expression(ops[1]);
  13510. break;
  13511. }
  13512. case OpGroupFMinNonUniformAMD:
  13513. case OpGroupUMinNonUniformAMD:
  13514. case OpGroupSMinNonUniformAMD:
  13515. {
  13516. uint32_t result_type = ops[0];
  13517. uint32_t id = ops[1];
  13518. emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
  13519. require_extension_internal("GL_AMD_shader_ballot");
  13520. register_control_dependent_expression(ops[1]);
  13521. break;
  13522. }
  13523. case OpGroupFMaxNonUniformAMD:
  13524. case OpGroupUMaxNonUniformAMD:
  13525. case OpGroupSMaxNonUniformAMD:
  13526. {
  13527. uint32_t result_type = ops[0];
  13528. uint32_t id = ops[1];
  13529. emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
  13530. require_extension_internal("GL_AMD_shader_ballot");
  13531. register_control_dependent_expression(ops[1]);
  13532. break;
  13533. }
  13534. case OpFragmentMaskFetchAMD:
  13535. {
  13536. auto &type = expression_type(ops[2]);
  13537. uint32_t result_type = ops[0];
  13538. uint32_t id = ops[1];
  13539. if (type.image.dim == DimSubpassData)
  13540. {
  13541. emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
  13542. }
  13543. else
  13544. {
  13545. emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
  13546. }
  13547. require_extension_internal("GL_AMD_shader_fragment_mask");
  13548. break;
  13549. }
  13550. case OpFragmentFetchAMD:
  13551. {
  13552. auto &type = expression_type(ops[2]);
  13553. uint32_t result_type = ops[0];
  13554. uint32_t id = ops[1];
  13555. if (type.image.dim == DimSubpassData)
  13556. {
  13557. emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
  13558. }
  13559. else
  13560. {
  13561. emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
  13562. }
  13563. require_extension_internal("GL_AMD_shader_fragment_mask");
  13564. break;
  13565. }
  13566. // Vulkan 1.1 sub-group stuff ...
  13567. case OpGroupNonUniformElect:
  13568. case OpGroupNonUniformBroadcast:
  13569. case OpGroupNonUniformBroadcastFirst:
  13570. case OpGroupNonUniformBallot:
  13571. case OpGroupNonUniformInverseBallot:
  13572. case OpGroupNonUniformBallotBitExtract:
  13573. case OpGroupNonUniformBallotBitCount:
  13574. case OpGroupNonUniformBallotFindLSB:
  13575. case OpGroupNonUniformBallotFindMSB:
  13576. case OpGroupNonUniformShuffle:
  13577. case OpGroupNonUniformShuffleXor:
  13578. case OpGroupNonUniformShuffleUp:
  13579. case OpGroupNonUniformShuffleDown:
  13580. case OpGroupNonUniformAll:
  13581. case OpGroupNonUniformAny:
  13582. case OpGroupNonUniformAllEqual:
  13583. case OpGroupNonUniformFAdd:
  13584. case OpGroupNonUniformIAdd:
  13585. case OpGroupNonUniformFMul:
  13586. case OpGroupNonUniformIMul:
  13587. case OpGroupNonUniformFMin:
  13588. case OpGroupNonUniformFMax:
  13589. case OpGroupNonUniformSMin:
  13590. case OpGroupNonUniformSMax:
  13591. case OpGroupNonUniformUMin:
  13592. case OpGroupNonUniformUMax:
  13593. case OpGroupNonUniformBitwiseAnd:
  13594. case OpGroupNonUniformBitwiseOr:
  13595. case OpGroupNonUniformBitwiseXor:
  13596. case OpGroupNonUniformLogicalAnd:
  13597. case OpGroupNonUniformLogicalOr:
  13598. case OpGroupNonUniformLogicalXor:
  13599. case OpGroupNonUniformQuadSwap:
  13600. case OpGroupNonUniformQuadBroadcast:
  13601. case OpGroupNonUniformQuadAllKHR:
  13602. case OpGroupNonUniformQuadAnyKHR:
  13603. case OpGroupNonUniformRotateKHR:
  13604. emit_subgroup_op(instruction);
  13605. break;
  13606. case OpFUnordEqual:
  13607. case OpFUnordLessThan:
  13608. case OpFUnordGreaterThan:
  13609. case OpFUnordLessThanEqual:
  13610. case OpFUnordGreaterThanEqual:
  13611. {
  13612. // GLSL doesn't specify if floating point comparisons are ordered or unordered,
  13613. // but glslang always emits ordered floating point compares for GLSL.
  13614. // To get unordered compares, we can test the opposite thing and invert the result.
  13615. // This way, we force true when there is any NaN present.
  13616. uint32_t op0 = ops[2];
  13617. uint32_t op1 = ops[3];
  13618. string expr;
  13619. if (expression_type(op0).vecsize > 1)
  13620. {
  13621. const char *comp_op = nullptr;
  13622. switch (opcode)
  13623. {
  13624. case OpFUnordEqual:
  13625. comp_op = "notEqual";
  13626. break;
  13627. case OpFUnordLessThan:
  13628. comp_op = "greaterThanEqual";
  13629. break;
  13630. case OpFUnordLessThanEqual:
  13631. comp_op = "greaterThan";
  13632. break;
  13633. case OpFUnordGreaterThan:
  13634. comp_op = "lessThanEqual";
  13635. break;
  13636. case OpFUnordGreaterThanEqual:
  13637. comp_op = "lessThan";
  13638. break;
  13639. default:
  13640. assert(0);
  13641. break;
  13642. }
  13643. expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
  13644. }
  13645. else
  13646. {
  13647. const char *comp_op = nullptr;
  13648. switch (opcode)
  13649. {
  13650. case OpFUnordEqual:
  13651. comp_op = " != ";
  13652. break;
  13653. case OpFUnordLessThan:
  13654. comp_op = " >= ";
  13655. break;
  13656. case OpFUnordLessThanEqual:
  13657. comp_op = " > ";
  13658. break;
  13659. case OpFUnordGreaterThan:
  13660. comp_op = " <= ";
  13661. break;
  13662. case OpFUnordGreaterThanEqual:
  13663. comp_op = " < ";
  13664. break;
  13665. default:
  13666. assert(0);
  13667. break;
  13668. }
  13669. expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
  13670. }
  13671. emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
  13672. inherit_expression_dependencies(ops[1], op0);
  13673. inherit_expression_dependencies(ops[1], op1);
  13674. break;
  13675. }
  13676. case OpReportIntersectionKHR:
  13677. // NV is same opcode.
  13678. forced_temporaries.insert(ops[1]);
  13679. if (ray_tracing_is_khr)
  13680. GLSL_BFOP(reportIntersectionEXT);
  13681. else
  13682. GLSL_BFOP(reportIntersectionNV);
  13683. flush_control_dependent_expressions(current_emitting_block->self);
  13684. break;
  13685. case OpIgnoreIntersectionNV:
  13686. // KHR variant is a terminator.
  13687. statement("ignoreIntersectionNV();");
  13688. flush_control_dependent_expressions(current_emitting_block->self);
  13689. break;
  13690. case OpTerminateRayNV:
  13691. // KHR variant is a terminator.
  13692. statement("terminateRayNV();");
  13693. flush_control_dependent_expressions(current_emitting_block->self);
  13694. break;
  13695. case OpTraceNV:
  13696. statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  13697. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13698. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  13699. to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
  13700. flush_control_dependent_expressions(current_emitting_block->self);
  13701. break;
  13702. case OpTraceRayKHR:
  13703. if (!has_decoration(ops[10], DecorationLocation))
  13704. SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
  13705. statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  13706. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13707. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  13708. to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
  13709. flush_control_dependent_expressions(current_emitting_block->self);
  13710. break;
  13711. case OpExecuteCallableNV:
  13712. statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  13713. flush_control_dependent_expressions(current_emitting_block->self);
  13714. break;
  13715. case OpExecuteCallableKHR:
  13716. if (!has_decoration(ops[1], DecorationLocation))
  13717. SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
  13718. statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
  13719. flush_control_dependent_expressions(current_emitting_block->self);
  13720. break;
  13721. // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
  13722. case OpRayQueryInitializeKHR:
  13723. flush_variable_declaration(ops[0]);
  13724. statement("rayQueryInitializeEXT(",
  13725. to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
  13726. to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
  13727. to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13728. to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
  13729. break;
  13730. case OpRayQueryProceedKHR:
  13731. flush_variable_declaration(ops[0]);
  13732. emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
  13733. break;
  13734. case OpRayQueryTerminateKHR:
  13735. flush_variable_declaration(ops[0]);
  13736. statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
  13737. break;
  13738. case OpRayQueryGenerateIntersectionKHR:
  13739. flush_variable_declaration(ops[0]);
  13740. statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  13741. break;
  13742. case OpRayQueryConfirmIntersectionKHR:
  13743. flush_variable_declaration(ops[0]);
  13744. statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
  13745. break;
  13746. case OpRayQueryGetIntersectionTriangleVertexPositionsKHR:
  13747. flush_variable_declaration(ops[1]);
  13748. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  13749. statement("rayQueryGetIntersectionTriangleVertexPositionsEXT(", to_expression(ops[2]), ", bool(", to_expression(ops[3]), "), ", to_expression(ops[1]), ");");
  13750. break;
  13751. #define GLSL_RAY_QUERY_GET_OP(op) \
  13752. case OpRayQueryGet##op##KHR: \
  13753. flush_variable_declaration(ops[2]); \
  13754. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
  13755. break
  13756. #define GLSL_RAY_QUERY_GET_OP2(op) \
  13757. case OpRayQueryGet##op##KHR: \
  13758. flush_variable_declaration(ops[2]); \
  13759. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
  13760. break
  13761. GLSL_RAY_QUERY_GET_OP(RayTMin);
  13762. GLSL_RAY_QUERY_GET_OP(RayFlags);
  13763. GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
  13764. GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
  13765. GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
  13766. GLSL_RAY_QUERY_GET_OP2(IntersectionType);
  13767. GLSL_RAY_QUERY_GET_OP2(IntersectionT);
  13768. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
  13769. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
  13770. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
  13771. GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
  13772. GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
  13773. GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
  13774. GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
  13775. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
  13776. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
  13777. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
  13778. GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
  13779. #undef GLSL_RAY_QUERY_GET_OP
  13780. #undef GLSL_RAY_QUERY_GET_OP2
  13781. case OpRayQueryGetClusterIdNV:
  13782. flush_variable_declaration(ops[2]);
  13783. emit_op(ops[0], ops[1], join("rayQueryGetIntersectionClusterIdNV(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false);
  13784. break;
  13785. case OpTensorQuerySizeARM:
  13786. flush_variable_declaration(ops[1]);
  13787. // tensorSizeARM(tensor, dimension)
  13788. emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "tensorSizeARM");
  13789. break;
  13790. case OpTensorReadARM:
  13791. {
  13792. flush_variable_declaration(ops[1]);
  13793. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  13794. SmallVector<std::string> args {
  13795. to_expression(ops[2]), // tensor
  13796. to_expression(ops[3]), // coordinates
  13797. to_expression(ops[1]), // out value
  13798. };
  13799. if (length > 4)
  13800. {
  13801. std::string tensor_operands;
  13802. if (ops[4] == 0)
  13803. tensor_operands = "0x0u";
  13804. else if (ops[4] == TensorOperandsNontemporalARMMask)
  13805. tensor_operands = "gl_TensorOperandsNonTemporalARM";
  13806. else if (ops[4] == TensorOperandsOutOfBoundsValueARMMask)
  13807. tensor_operands = "gl_TensorOperandsOutOfBoundsValueARM";
  13808. else if (ops[4] == (TensorOperandsNontemporalARMMask | TensorOperandsOutOfBoundsValueARMMask))
  13809. tensor_operands = "gl_TensorOperandsNonTemporalARM | gl_TensorOperandsOutOfBoundsValueARM";
  13810. else
  13811. SPIRV_CROSS_THROW("Invalid tensorOperands for tensorReadARM.");
  13812. if ((ops[4] & TensorOperandsOutOfBoundsValueARMMask) && length != 6)
  13813. SPIRV_CROSS_THROW("gl_TensorOperandsOutOfBoundsValueARM requires an outOfBoundsValue argument.");
  13814. args.push_back(tensor_operands); // tensorOperands
  13815. }
  13816. if (length >= 6)
  13817. {
  13818. if ((length > 6) || (ops[4] & TensorOperandsOutOfBoundsValueARMMask) == 0)
  13819. SPIRV_CROSS_THROW("Too many arguments to tensorReadARM.");
  13820. args.push_back(to_expression(ops[5])); // outOfBoundsValue
  13821. }
  13822. // tensorRead(tensor, sizeof(type), coordinates, value, operand, ...)
  13823. statement("tensorReadARM(", merge(args), ");");
  13824. break;
  13825. }
  13826. case OpTensorWriteARM:
  13827. {
  13828. flush_variable_declaration(ops[0]);
  13829. SmallVector<std::string> args {
  13830. to_expression(ops[0]), // tensor
  13831. to_expression(ops[1]), // coordinates
  13832. to_expression(ops[2]), // out value
  13833. };
  13834. if (length > 3)
  13835. {
  13836. std::string tensor_operands;
  13837. if (ops[3] == 0)
  13838. tensor_operands = "0x0u";
  13839. else if (ops[3] == TensorOperandsNontemporalARMMask)
  13840. tensor_operands = "gl_TensorOperandsNonTemporalARM";
  13841. else
  13842. SPIRV_CROSS_THROW("Invalid tensorOperands for tensorWriteARM.");
  13843. args.push_back(tensor_operands); // tensorOperands
  13844. }
  13845. if (length > 4)
  13846. SPIRV_CROSS_THROW("Too many arguments to tensorWriteARM.");
  13847. // tensorWrite(tensor, sizeof(type), coordinates, value)
  13848. statement("tensorWriteARM(", merge(args), ");");
  13849. break;
  13850. }
  13851. case OpConvertUToAccelerationStructureKHR:
  13852. {
  13853. require_extension_internal("GL_EXT_ray_tracing");
  13854. bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 &&
  13855. !hoisted_temporaries.count(ops[1]);
  13856. if (elide_temporary)
  13857. {
  13858. GLSL_UFOP(accelerationStructureEXT);
  13859. }
  13860. else
  13861. {
  13862. // Force this path in subsequent iterations.
  13863. forced_temporaries.insert(ops[1]);
  13864. // We cannot declare a temporary acceleration structure in GLSL.
  13865. // If we get to this point, we'll have to emit a temporary uvec2,
  13866. // and cast to RTAS on demand.
  13867. statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";");
  13868. // Use raw SPIRExpression interface to block all usage tracking.
  13869. set<SPIRExpression>(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true);
  13870. }
  13871. break;
  13872. }
  13873. case OpConvertUToPtr:
  13874. {
  13875. auto &type = get<SPIRType>(ops[0]);
  13876. if (type.storage != StorageClassPhysicalStorageBuffer)
  13877. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertUToPtr.");
  13878. auto &in_type = expression_type(ops[2]);
  13879. if (in_type.vecsize == 2)
  13880. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  13881. auto op = type_to_glsl(type);
  13882. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  13883. break;
  13884. }
  13885. case OpConvertPtrToU:
  13886. {
  13887. auto &type = get<SPIRType>(ops[0]);
  13888. auto &ptr_type = expression_type(ops[2]);
  13889. if (ptr_type.storage != StorageClassPhysicalStorageBuffer)
  13890. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertPtrToU.");
  13891. if (type.vecsize == 2)
  13892. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  13893. auto op = type_to_glsl(type);
  13894. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  13895. break;
  13896. }
  13897. case OpUndef:
  13898. // Undefined value has been declared.
  13899. break;
  13900. case OpLine:
  13901. {
  13902. emit_line_directive(ops[0], ops[1]);
  13903. break;
  13904. }
  13905. case OpNoLine:
  13906. break;
  13907. case OpDemoteToHelperInvocationEXT:
  13908. if (!options.vulkan_semantics)
  13909. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  13910. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  13911. statement(backend.demote_literal, ";");
  13912. break;
  13913. case OpIsHelperInvocationEXT:
  13914. if (!options.vulkan_semantics)
  13915. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  13916. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  13917. // Helper lane state with demote is volatile by nature.
  13918. // Do not forward this.
  13919. emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
  13920. break;
  13921. case OpBeginInvocationInterlockEXT:
  13922. // If the interlock is complex, we emit this elsewhere.
  13923. if (!interlocked_is_complex)
  13924. {
  13925. statement("SPIRV_Cross_beginInvocationInterlock();");
  13926. flush_all_active_variables();
  13927. // Make sure forwarding doesn't propagate outside interlock region.
  13928. }
  13929. break;
  13930. case OpEndInvocationInterlockEXT:
  13931. // If the interlock is complex, we emit this elsewhere.
  13932. if (!interlocked_is_complex)
  13933. {
  13934. statement("SPIRV_Cross_endInvocationInterlock();");
  13935. flush_all_active_variables();
  13936. // Make sure forwarding doesn't propagate outside interlock region.
  13937. }
  13938. break;
  13939. case OpSetMeshOutputsEXT:
  13940. statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
  13941. break;
  13942. case OpReadClockKHR:
  13943. {
  13944. auto &type = get<SPIRType>(ops[0]);
  13945. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  13946. const char *op = nullptr;
  13947. // Forwarding clock statements leads to a scenario where an SSA value can take on different
  13948. // values every time it's evaluated. Block any forwarding attempt.
  13949. // We also might want to invalidate all expressions to function as a sort of optimization
  13950. // barrier, but might be overkill for now.
  13951. if (scope == ScopeDevice)
  13952. {
  13953. require_extension_internal("GL_EXT_shader_realtime_clock");
  13954. if (type.basetype == SPIRType::BaseType::UInt64)
  13955. op = "clockRealtimeEXT()";
  13956. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  13957. op = "clockRealtime2x32EXT()";
  13958. else
  13959. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  13960. }
  13961. else if (scope == ScopeSubgroup)
  13962. {
  13963. require_extension_internal("GL_ARB_shader_clock");
  13964. if (type.basetype == SPIRType::BaseType::UInt64)
  13965. op = "clockARB()";
  13966. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  13967. op = "clock2x32ARB()";
  13968. else
  13969. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  13970. }
  13971. else
  13972. SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode.");
  13973. emit_op(ops[0], ops[1], op, false);
  13974. break;
  13975. }
  13976. case OpCooperativeVectorLoadNV:
  13977. {
  13978. uint32_t result_type = ops[0];
  13979. uint32_t id = ops[1];
  13980. emit_uninitialized_temporary_expression(result_type, id);
  13981. statement("coopVecLoadNV(", to_expression(id), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ");");
  13982. register_read(id, ops[2], false);
  13983. break;
  13984. }
  13985. case OpCooperativeVectorStoreNV:
  13986. {
  13987. uint32_t id = ops[0];
  13988. statement("coopVecStoreNV(", to_expression(ops[2]), ", ", to_expression(id), ", ", to_expression(ops[1]), ");");
  13989. register_write(ops[2]);
  13990. break;
  13991. }
  13992. case OpCooperativeVectorOuterProductAccumulateNV:
  13993. {
  13994. auto buf = ops[0];
  13995. auto offset = ops[1];
  13996. auto v1 = ops[2];
  13997. auto v2 = ops[3];
  13998. auto matrix_layout_id = ops[4];
  13999. auto matrix_iterpretation_id = ops[5];
  14000. auto matrix_stride_id = length >= 6 ? ops[6] : 0;
  14001. statement(join("coopVecOuterProductAccumulateNV(", to_expression(v1), ", ", to_expression(v2), ", ",
  14002. to_expression(buf), ", ", to_expression(offset), ", ",
  14003. matrix_stride_id ? to_expression(matrix_stride_id) : "0",
  14004. ", ", to_pretty_expression_if_int_constant(
  14005. matrix_layout_id, std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)),
  14006. ", ", to_pretty_expression_if_int_constant(
  14007. matrix_iterpretation_id, std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)),
  14008. ");"));
  14009. register_write(ops[0]);
  14010. break;
  14011. }
  14012. case OpCooperativeVectorReduceSumAccumulateNV:
  14013. {
  14014. auto buf = ops[0];
  14015. auto offset = ops[1];
  14016. auto v1 = ops[2];
  14017. statement(join("coopVecReduceSumAccumulateNV(", to_expression(v1), ", ", to_expression(buf), ", ",
  14018. to_expression(offset), ");"));
  14019. register_write(ops[0]);
  14020. break;
  14021. }
  14022. case OpCooperativeVectorMatrixMulNV:
  14023. case OpCooperativeVectorMatrixMulAddNV:
  14024. {
  14025. uint32_t result_type = ops[0];
  14026. uint32_t id = ops[1];
  14027. emit_uninitialized_temporary_expression(result_type, id);
  14028. std::string stmt;
  14029. switch (opcode)
  14030. {
  14031. case OpCooperativeVectorMatrixMulAddNV:
  14032. stmt += "coopVecMatMulAddNV(";
  14033. break;
  14034. case OpCooperativeVectorMatrixMulNV:
  14035. stmt += "coopVecMatMulNV(";
  14036. break;
  14037. default:
  14038. SPIRV_CROSS_THROW("Invalid op code for coopvec instruction.");
  14039. }
  14040. for (uint32_t i = 1; i < length; i++)
  14041. {
  14042. // arguments 3, 6 and in case of MulAddNv also 9 use component type int constants
  14043. if (i == 3 || i == 6 || (i == 9 && opcode == OpCooperativeVectorMatrixMulAddNV))
  14044. {
  14045. stmt += to_pretty_expression_if_int_constant(
  14046. ops[i], std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames));
  14047. }
  14048. else if ((i == 12 && opcode == OpCooperativeVectorMatrixMulAddNV) ||
  14049. (i == 9 && opcode == OpCooperativeVectorMatrixMulNV))
  14050. {
  14051. stmt += to_pretty_expression_if_int_constant(
  14052. ops[i], std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames));
  14053. }
  14054. else
  14055. stmt += to_expression(ops[i]);
  14056. if (i < length - 1)
  14057. stmt += ", ";
  14058. }
  14059. stmt += ");";
  14060. statement(stmt);
  14061. break;
  14062. }
  14063. case OpCooperativeMatrixLengthKHR:
  14064. {
  14065. // Need to synthesize a dummy temporary, since the SPIR-V opcode is based on the type.
  14066. uint32_t result_type = ops[0];
  14067. uint32_t id = ops[1];
  14068. set<SPIRExpression>(
  14069. id, join(type_to_glsl(get<SPIRType>(result_type)),
  14070. "(", type_to_glsl(get<SPIRType>(ops[2])), "(0).length())"),
  14071. result_type, true);
  14072. break;
  14073. }
  14074. case OpCooperativeMatrixLoadKHR:
  14075. {
  14076. // Spec contradicts itself if stride is optional or not.
  14077. if (length < 5)
  14078. SPIRV_CROSS_THROW("Stride is not provided.");
  14079. uint32_t result_type = ops[0];
  14080. uint32_t id = ops[1];
  14081. emit_uninitialized_temporary_expression(result_type, id);
  14082. auto expr = to_expression(ops[2]);
  14083. pair<string, string> split_expr;
  14084. if (!is_forcing_recompilation())
  14085. split_expr = split_coopmat_pointer(expr);
  14086. string layout_expr = to_pretty_expression_if_int_constant(
  14087. ops[3], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames));
  14088. statement("coopMatLoad(", to_expression(id), ", ", split_expr.first, ", ", split_expr.second, ", ",
  14089. to_expression(ops[4]), ", ", layout_expr, ");");
  14090. register_read(id, ops[2], false);
  14091. break;
  14092. }
  14093. case OpCooperativeMatrixStoreKHR:
  14094. {
  14095. // Spec contradicts itself if stride is optional or not.
  14096. if (length < 4)
  14097. SPIRV_CROSS_THROW("Stride is not provided.");
  14098. // SPIR-V and GLSL don't agree how to pass the expression.
  14099. // In SPIR-V it's a pointer, but in GLSL it's reference to array + index.
  14100. auto expr = to_expression(ops[0]);
  14101. pair<string, string> split_expr;
  14102. if (!is_forcing_recompilation())
  14103. split_expr = split_coopmat_pointer(expr);
  14104. string layout_expr = to_pretty_expression_if_int_constant(
  14105. ops[2], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames));
  14106. statement("coopMatStore(", to_expression(ops[1]), ", ", split_expr.first, ", ", split_expr.second, ", ",
  14107. to_expression(ops[3]), ", ", layout_expr, ");");
  14108. // TODO: Do we care about memory operands?
  14109. register_write(ops[0]);
  14110. break;
  14111. }
  14112. case OpCooperativeMatrixMulAddKHR:
  14113. {
  14114. uint32_t result_type = ops[0];
  14115. uint32_t id = ops[1];
  14116. uint32_t A = ops[2];
  14117. uint32_t B = ops[3];
  14118. uint32_t C = ops[4];
  14119. bool forward = should_forward(A) && should_forward(B) && should_forward(C);
  14120. emit_op(result_type, id,
  14121. join("coopMatMulAdd(",
  14122. to_unpacked_expression(A), ", ",
  14123. to_unpacked_expression(B), ", ",
  14124. to_unpacked_expression(C), ", ",
  14125. (length >= 6 ? ops[5] : 0),
  14126. ")"),
  14127. forward);
  14128. inherit_expression_dependencies(id, A);
  14129. inherit_expression_dependencies(id, B);
  14130. inherit_expression_dependencies(id, C);
  14131. break;
  14132. }
  14133. case OpCompositeConstructReplicateEXT:
  14134. {
  14135. uint32_t result_type = ops[0];
  14136. uint32_t id = ops[1];
  14137. auto &type = get<SPIRType>(result_type);
  14138. auto value_to_replicate = to_expression(ops[2]);
  14139. std::string rhs;
  14140. // Matrices don't have a replicating constructor for vectors. Need to manually replicate
  14141. if (type.op == OpTypeMatrix || type.op == OpTypeArray)
  14142. {
  14143. if (type.op == OpTypeArray && type.array.size() != 1)
  14144. {
  14145. SPIRV_CROSS_THROW(
  14146. "Multi-dimensional arrays currently not supported for OpCompositeConstructReplicateEXT");
  14147. }
  14148. uint32_t num_elements = type.op == OpTypeMatrix ? type.columns : type.array[0];
  14149. if (backend.use_initializer_list && type.op == OpTypeArray)
  14150. {
  14151. rhs += "{";
  14152. }
  14153. else
  14154. {
  14155. rhs += type_to_glsl_constructor(type);
  14156. rhs += "(";
  14157. }
  14158. for (uint32_t i = 0; i < num_elements; i++)
  14159. {
  14160. rhs += value_to_replicate;
  14161. if (i < num_elements - 1)
  14162. rhs += ", ";
  14163. }
  14164. if (backend.use_initializer_list && type.op == OpTypeArray)
  14165. rhs += "}";
  14166. else
  14167. rhs += ")";
  14168. }
  14169. else
  14170. {
  14171. rhs = join(type_to_glsl(type), "(", to_expression(ops[2]), ")");
  14172. }
  14173. emit_op(result_type, id, rhs, true);
  14174. break;
  14175. }
  14176. default:
  14177. statement("// unimplemented op ", instruction.op);
  14178. break;
  14179. }
  14180. }
  14181. // Appends function arguments, mapped from global variables, beyond the specified arg index.
  14182. // This is used when a function call uses fewer arguments than the function defines.
  14183. // This situation may occur if the function signature has been dynamically modified to
  14184. // extract global variables referenced from within the function, and convert them to
  14185. // function arguments. This is necessary for shader languages that do not support global
  14186. // access to shader input content from within a function (eg. Metal). Each additional
  14187. // function args uses the name of the global variable. Function nesting will modify the
  14188. // functions and function calls all the way up the nesting chain.
  14189. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
  14190. {
  14191. auto &args = func.arguments;
  14192. uint32_t arg_cnt = uint32_t(args.size());
  14193. for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
  14194. {
  14195. auto &arg = args[arg_idx];
  14196. assert(arg.alias_global_variable);
  14197. // If the underlying variable needs to be declared
  14198. // (ie. a local variable with deferred declaration), do so now.
  14199. uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
  14200. if (var_id)
  14201. flush_variable_declaration(var_id);
  14202. arglist.push_back(to_func_call_arg(arg, arg.id));
  14203. }
  14204. }
  14205. string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
  14206. {
  14207. if (type.type_alias != TypeID(0) &&
  14208. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  14209. {
  14210. return to_member_name(get<SPIRType>(type.type_alias), index);
  14211. }
  14212. auto &memb = ir.meta[type.self].members;
  14213. if (index < memb.size() && !memb[index].alias.empty())
  14214. return memb[index].alias;
  14215. else
  14216. return join("_m", index);
  14217. }
  14218. string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
  14219. {
  14220. return join(".", to_member_name(type, index));
  14221. }
  14222. string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
  14223. {
  14224. string ret;
  14225. auto *member_type = &type;
  14226. for (auto &index : indices)
  14227. {
  14228. ret += join(".", to_member_name(*member_type, index));
  14229. member_type = &get<SPIRType>(member_type->member_types[index]);
  14230. }
  14231. return ret;
  14232. }
  14233. void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
  14234. {
  14235. auto &memb = ir.meta[type.self].members;
  14236. if (index < memb.size() && !memb[index].alias.empty())
  14237. {
  14238. auto &name = memb[index].alias;
  14239. if (name.empty())
  14240. return;
  14241. ParsedIR::sanitize_identifier(name, true, true);
  14242. update_name_cache(type.member_name_cache, name);
  14243. }
  14244. }
  14245. // Checks whether the ID is a row_major matrix that requires conversion before use
  14246. bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
  14247. {
  14248. // Natively supported row-major matrices do not need to be converted.
  14249. // Legacy targets do not support row major.
  14250. if (backend.native_row_major_matrix && !is_legacy())
  14251. return false;
  14252. auto *e = maybe_get<SPIRExpression>(id);
  14253. if (e)
  14254. return e->need_transpose;
  14255. else
  14256. return has_decoration(id, DecorationRowMajor);
  14257. }
  14258. // Checks whether the member is a row_major matrix that requires conversion before use
  14259. bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  14260. {
  14261. // Natively supported row-major matrices do not need to be converted.
  14262. if (backend.native_row_major_matrix && !is_legacy())
  14263. return false;
  14264. // Non-matrix or column-major matrix types do not need to be converted.
  14265. if (!has_member_decoration(type.self, index, DecorationRowMajor))
  14266. return false;
  14267. // Only square row-major matrices can be converted at this time.
  14268. // Converting non-square matrices will require defining custom GLSL function that
  14269. // swaps matrix elements while retaining the original dimensional form of the matrix.
  14270. const auto mbr_type = get<SPIRType>(type.member_types[index]);
  14271. if (mbr_type.columns != mbr_type.vecsize)
  14272. SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
  14273. return true;
  14274. }
  14275. // Checks if we need to remap physical type IDs when declaring the type in a buffer.
  14276. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
  14277. {
  14278. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  14279. }
  14280. // Checks whether the member is in packed data type, that might need to be unpacked.
  14281. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
  14282. {
  14283. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  14284. }
  14285. // Wraps the expression string in a function call that converts the
  14286. // row_major matrix result of the expression to a column_major matrix.
  14287. // Base implementation uses the standard library transpose() function.
  14288. // Subclasses may override to use a different function.
  14289. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
  14290. bool /*is_packed*/, bool relaxed)
  14291. {
  14292. strip_enclosed_expression(exp_str);
  14293. if (!is_matrix(exp_type))
  14294. {
  14295. auto column_index = exp_str.find_last_of('[');
  14296. if (column_index == string::npos)
  14297. return exp_str;
  14298. auto column_expr = exp_str.substr(column_index);
  14299. exp_str.resize(column_index);
  14300. auto end_deferred_index = column_expr.find_last_of(']');
  14301. if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size())
  14302. {
  14303. // If we have any data member fixups, it must be transposed so that it refers to this index.
  14304. // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
  14305. // and needs to be [1].data[0] instead.
  14306. end_deferred_index++;
  14307. column_expr = column_expr.substr(end_deferred_index) +
  14308. column_expr.substr(0, end_deferred_index);
  14309. }
  14310. auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
  14311. // Loading a column from a row-major matrix. Unroll the load.
  14312. for (uint32_t c = 0; c < exp_type.vecsize; c++)
  14313. {
  14314. transposed_expr += join(exp_str, '[', c, ']', column_expr);
  14315. if (c + 1 < exp_type.vecsize)
  14316. transposed_expr += ", ";
  14317. }
  14318. transposed_expr += ")";
  14319. return transposed_expr;
  14320. }
  14321. else if (options.version < 120)
  14322. {
  14323. // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
  14324. // these GLSL versions do not support non-square matrices.
  14325. if (exp_type.vecsize == 2 && exp_type.columns == 2)
  14326. require_polyfill(PolyfillTranspose2x2, relaxed);
  14327. else if (exp_type.vecsize == 3 && exp_type.columns == 3)
  14328. require_polyfill(PolyfillTranspose3x3, relaxed);
  14329. else if (exp_type.vecsize == 4 && exp_type.columns == 4)
  14330. require_polyfill(PolyfillTranspose4x4, relaxed);
  14331. else
  14332. SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
  14333. return join("spvTranspose", (options.es && relaxed) ? "MP" : "", "(", exp_str, ")");
  14334. }
  14335. else
  14336. return join("transpose(", exp_str, ")");
  14337. }
  14338. string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
  14339. {
  14340. string type_name = type_to_glsl(type, id);
  14341. remap_variable_type_name(type, name, type_name);
  14342. return join(type_name, " ", name, type_to_array_glsl(type, id));
  14343. }
  14344. bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
  14345. {
  14346. return var.storage == storage;
  14347. }
  14348. // Emit a structure member. Subclasses may override to modify output,
  14349. // or to dynamically add a padding member if needed.
  14350. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  14351. const string &qualifier, uint32_t)
  14352. {
  14353. auto &membertype = get<SPIRType>(member_type_id);
  14354. Bitset memberflags;
  14355. auto &memb = ir.meta[type.self].members;
  14356. if (index < memb.size())
  14357. memberflags = memb[index].decoration_flags;
  14358. string qualifiers;
  14359. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  14360. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  14361. if (is_block)
  14362. qualifiers = to_interpolation_qualifiers(memberflags);
  14363. statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, 0, memberflags),
  14364. variable_decl(membertype, to_member_name(type, index)), ";");
  14365. }
  14366. void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
  14367. {
  14368. }
  14369. string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags)
  14370. {
  14371. // GL_EXT_buffer_reference variables can be marked as restrict.
  14372. if (flags.get(DecorationRestrictPointerEXT))
  14373. return "restrict ";
  14374. string qual;
  14375. if (type_is_floating_point(type) &&
  14376. (flags.get(DecorationNoContraction) || (type.self && has_legacy_nocontract(type.self, id))) &&
  14377. backend.support_precise_qualifier)
  14378. {
  14379. qual = "precise ";
  14380. }
  14381. // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
  14382. bool type_supports_precision =
  14383. type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
  14384. type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
  14385. type.basetype == SPIRType::Sampler;
  14386. if (!type_supports_precision)
  14387. return qual;
  14388. if (options.es)
  14389. {
  14390. auto &execution = get_entry_point();
  14391. if (type.basetype == SPIRType::UInt && is_legacy_es())
  14392. {
  14393. // HACK: This is a bool. See comment in type_to_glsl().
  14394. qual += "lowp ";
  14395. }
  14396. else if (flags.get(DecorationRelaxedPrecision))
  14397. {
  14398. bool implied_fmediump = type.basetype == SPIRType::Float &&
  14399. options.fragment.default_float_precision == Options::Mediump &&
  14400. execution.model == ExecutionModelFragment;
  14401. bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  14402. options.fragment.default_int_precision == Options::Mediump &&
  14403. execution.model == ExecutionModelFragment;
  14404. qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
  14405. }
  14406. else
  14407. {
  14408. bool implied_fhighp =
  14409. type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
  14410. execution.model == ExecutionModelFragment) ||
  14411. (execution.model != ExecutionModelFragment));
  14412. bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  14413. ((options.fragment.default_int_precision == Options::Highp &&
  14414. execution.model == ExecutionModelFragment) ||
  14415. (execution.model != ExecutionModelFragment));
  14416. qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
  14417. }
  14418. }
  14419. else if (backend.allow_precision_qualifiers)
  14420. {
  14421. // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
  14422. // The default is highp however, so only emit mediump in the rare case that a shader has these.
  14423. if (flags.get(DecorationRelaxedPrecision))
  14424. qual += "mediump ";
  14425. }
  14426. return qual;
  14427. }
  14428. string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
  14429. {
  14430. auto &type = expression_type(id);
  14431. bool use_precision_qualifiers = backend.allow_precision_qualifiers;
  14432. if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
  14433. {
  14434. // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
  14435. auto &result_type = get<SPIRType>(type.image.type);
  14436. if (result_type.width < 32)
  14437. return "mediump ";
  14438. }
  14439. return flags_to_qualifiers_glsl(type, id, ir.meta[id].decoration.decoration_flags);
  14440. }
  14441. void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
  14442. {
  14443. // Works around weird behavior in glslangValidator where
  14444. // a patch out block is translated to just block members getting the decoration.
  14445. // To make glslang not complain when we compile again, we have to transform this back to a case where
  14446. // the variable itself has Patch decoration, and not members.
  14447. // Same for perprimitiveEXT.
  14448. auto &type = get<SPIRType>(var.basetype);
  14449. if (has_decoration(type.self, DecorationBlock))
  14450. {
  14451. uint32_t member_count = uint32_t(type.member_types.size());
  14452. Decoration promoted_decoration = {};
  14453. bool do_promote_decoration = false;
  14454. for (uint32_t i = 0; i < member_count; i++)
  14455. {
  14456. if (has_member_decoration(type.self, i, DecorationPatch))
  14457. {
  14458. promoted_decoration = DecorationPatch;
  14459. do_promote_decoration = true;
  14460. break;
  14461. }
  14462. else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  14463. {
  14464. promoted_decoration = DecorationPerPrimitiveEXT;
  14465. do_promote_decoration = true;
  14466. break;
  14467. }
  14468. }
  14469. if (do_promote_decoration)
  14470. {
  14471. set_decoration(var.self, promoted_decoration);
  14472. for (uint32_t i = 0; i < member_count; i++)
  14473. unset_member_decoration(type.self, i, promoted_decoration);
  14474. }
  14475. }
  14476. }
  14477. string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
  14478. {
  14479. auto &flags = get_decoration_bitset(id);
  14480. string res;
  14481. auto *var = maybe_get<SPIRVariable>(id);
  14482. if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
  14483. res += "shared ";
  14484. else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
  14485. res += "taskPayloadSharedEXT ";
  14486. res += to_interpolation_qualifiers(flags);
  14487. if (var)
  14488. res += to_storage_qualifiers_glsl(*var);
  14489. auto &type = expression_type(id);
  14490. if (type.image.dim != DimSubpassData && type.image.sampled == 2)
  14491. {
  14492. if (flags.get(DecorationCoherent))
  14493. res += "coherent ";
  14494. if (flags.get(DecorationRestrict))
  14495. res += "restrict ";
  14496. if (flags.get(DecorationNonWritable))
  14497. res += "readonly ";
  14498. bool formatted_load = type.image.format == ImageFormatUnknown;
  14499. if (flags.get(DecorationNonReadable))
  14500. {
  14501. res += "writeonly ";
  14502. formatted_load = false;
  14503. }
  14504. if (formatted_load)
  14505. {
  14506. if (!options.es)
  14507. require_extension_internal("GL_EXT_shader_image_load_formatted");
  14508. else
  14509. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
  14510. }
  14511. }
  14512. else if (type.basetype == SPIRType::Tensor)
  14513. {
  14514. if (flags.get(DecorationNonWritable))
  14515. res += "readonly ";
  14516. if (flags.get(DecorationNonReadable))
  14517. res += "writeonly ";
  14518. }
  14519. res += to_precision_qualifiers_glsl(id);
  14520. return res;
  14521. }
  14522. string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
  14523. {
  14524. // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
  14525. auto &type = expression_type(arg.id);
  14526. const char *direction = "";
  14527. if (is_pointer(type) &&
  14528. (type.storage == StorageClassFunction ||
  14529. type.storage == StorageClassPrivate ||
  14530. type.storage == StorageClassOutput))
  14531. {
  14532. // If we're passing around block types to function, we really mean reference in a pointer sense,
  14533. // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct,
  14534. // but it works in practice due to legalization. It's ... not great, but you gotta do what you gotta do.
  14535. // GLSL will never hit this case since it's not valid.
  14536. if (type.storage == StorageClassOutput && get_execution_model() == ExecutionModelMeshEXT &&
  14537. has_decoration(type.self, DecorationBlock) && is_builtin_type(type) && arg.write_count)
  14538. {
  14539. direction = "out ";
  14540. }
  14541. else if (arg.write_count && arg.read_count)
  14542. direction = "inout ";
  14543. else if (arg.write_count)
  14544. direction = "out ";
  14545. }
  14546. return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
  14547. }
  14548. string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
  14549. {
  14550. return to_unpacked_expression(var.initializer);
  14551. }
  14552. string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
  14553. {
  14554. #ifndef NDEBUG
  14555. auto &type = get<SPIRType>(type_id);
  14556. assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
  14557. type.storage == StorageClassGeneric);
  14558. #endif
  14559. uint32_t id = ir.increase_bound_by(1);
  14560. ir.make_constant_null(id, type_id, false);
  14561. return constant_expression(get<SPIRConstant>(id));
  14562. }
  14563. bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
  14564. {
  14565. if (type.pointer)
  14566. return false;
  14567. if (!type.array.empty() && options.flatten_multidimensional_arrays)
  14568. return false;
  14569. for (auto &literal : type.array_size_literal)
  14570. if (!literal)
  14571. return false;
  14572. for (auto &memb : type.member_types)
  14573. if (!type_can_zero_initialize(get<SPIRType>(memb)))
  14574. return false;
  14575. return true;
  14576. }
  14577. string CompilerGLSL::variable_decl(const SPIRVariable &variable)
  14578. {
  14579. // Ignore the pointer type since GLSL doesn't have pointers.
  14580. auto &type = get_variable_data_type(variable);
  14581. if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
  14582. SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
  14583. auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
  14584. if (variable.loop_variable && variable.static_expression)
  14585. {
  14586. uint32_t expr = variable.static_expression;
  14587. if (ir.ids[expr].get_type() != TypeUndef)
  14588. res += join(" = ", to_unpacked_expression(variable.static_expression));
  14589. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  14590. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  14591. }
  14592. else if (variable.initializer)
  14593. {
  14594. if (!variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
  14595. {
  14596. uint32_t expr = variable.initializer;
  14597. if (ir.ids[expr].get_type() != TypeUndef)
  14598. res += join(" = ", to_initializer_expression(variable));
  14599. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  14600. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  14601. }
  14602. else
  14603. {
  14604. // Workgroup memory requires special handling. First, it can only be Null-Initialized.
  14605. // GLSL will handle this with null initializer, while others require more work after the decl
  14606. require_extension_internal("GL_EXT_null_initializer");
  14607. if (!backend.constant_null_initializer.empty())
  14608. res += join(" = ", backend.constant_null_initializer);
  14609. }
  14610. }
  14611. return res;
  14612. }
  14613. const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
  14614. {
  14615. auto &flags = get_decoration_bitset(variable.self);
  14616. if (flags.get(DecorationRelaxedPrecision))
  14617. return "mediump ";
  14618. else
  14619. return "highp ";
  14620. }
  14621. string CompilerGLSL::pls_decl(const PlsRemap &var)
  14622. {
  14623. auto &variable = get<SPIRVariable>(var.id);
  14624. auto op_and_basetype = pls_format_to_basetype(var.format);
  14625. SPIRType type { op_and_basetype.first };
  14626. type.basetype = op_and_basetype.second;
  14627. auto vecsize = pls_format_to_components(var.format);
  14628. if (vecsize > 1)
  14629. {
  14630. type.op = OpTypeVector;
  14631. type.vecsize = vecsize;
  14632. }
  14633. return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
  14634. to_name(variable.self));
  14635. }
  14636. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
  14637. {
  14638. return to_array_size_literal(type, uint32_t(type.array.size() - 1));
  14639. }
  14640. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
  14641. {
  14642. assert(type.array.size() == type.array_size_literal.size());
  14643. if (type.array_size_literal[index])
  14644. {
  14645. return type.array[index];
  14646. }
  14647. else
  14648. {
  14649. // Use the default spec constant value.
  14650. // This is the best we can do.
  14651. return evaluate_constant_u32(type.array[index]);
  14652. }
  14653. }
  14654. string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
  14655. {
  14656. assert(type.array.size() == type.array_size_literal.size());
  14657. auto &size = type.array[index];
  14658. if (!type.array_size_literal[index])
  14659. return to_expression(size);
  14660. else if (size)
  14661. return convert_to_string(size);
  14662. else if (!backend.unsized_array_supported)
  14663. {
  14664. // For runtime-sized arrays, we can work around
  14665. // lack of standard support for this by simply having
  14666. // a single element array.
  14667. //
  14668. // Runtime length arrays must always be the last element
  14669. // in an interface block.
  14670. return "1";
  14671. }
  14672. else
  14673. return "";
  14674. }
  14675. string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
  14676. {
  14677. if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer && type.basetype != SPIRType::Struct)
  14678. {
  14679. // We are using a wrapped pointer type, and we should not emit any array declarations here.
  14680. return "";
  14681. }
  14682. if (type.array.empty())
  14683. return "";
  14684. if (options.flatten_multidimensional_arrays)
  14685. {
  14686. string res;
  14687. res += "[";
  14688. for (auto i = uint32_t(type.array.size()); i; i--)
  14689. {
  14690. res += enclose_expression(to_array_size(type, i - 1));
  14691. if (i > 1)
  14692. res += " * ";
  14693. }
  14694. res += "]";
  14695. return res;
  14696. }
  14697. else
  14698. {
  14699. if (type.array.size() > 1)
  14700. {
  14701. if (!options.es && options.version < 430)
  14702. require_extension_internal("GL_ARB_arrays_of_arrays");
  14703. else if (options.es && options.version < 310)
  14704. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
  14705. "Try using --flatten-multidimensional-arrays or set "
  14706. "options.flatten_multidimensional_arrays to true.");
  14707. }
  14708. string res;
  14709. for (auto i = uint32_t(type.array.size()); i; i--)
  14710. {
  14711. res += "[";
  14712. res += to_array_size(type, i - 1);
  14713. res += "]";
  14714. }
  14715. return res;
  14716. }
  14717. }
  14718. string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/)
  14719. {
  14720. auto &imagetype = get<SPIRType>(type.image.type);
  14721. string res;
  14722. switch (imagetype.basetype)
  14723. {
  14724. case SPIRType::Int64:
  14725. res = "i64";
  14726. require_extension_internal("GL_EXT_shader_image_int64");
  14727. break;
  14728. case SPIRType::UInt64:
  14729. res = "u64";
  14730. require_extension_internal("GL_EXT_shader_image_int64");
  14731. break;
  14732. case SPIRType::Int:
  14733. case SPIRType::Short:
  14734. case SPIRType::SByte:
  14735. res = "i";
  14736. break;
  14737. case SPIRType::UInt:
  14738. case SPIRType::UShort:
  14739. case SPIRType::UByte:
  14740. res = "u";
  14741. break;
  14742. default:
  14743. break;
  14744. }
  14745. // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
  14746. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
  14747. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
  14748. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  14749. else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  14750. subpass_input_is_framebuffer_fetch(id))
  14751. {
  14752. SPIRType sampled_type = get<SPIRType>(type.image.type);
  14753. sampled_type.vecsize = 4;
  14754. return type_to_glsl(sampled_type);
  14755. }
  14756. // If we're emulating subpassInput with samplers, force sampler2D
  14757. // so we don't have to specify format.
  14758. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  14759. {
  14760. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  14761. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  14762. res += "sampler";
  14763. else
  14764. res += type.image.sampled == 2 ? "image" : "texture";
  14765. }
  14766. else
  14767. res += "sampler";
  14768. switch (type.image.dim)
  14769. {
  14770. case Dim1D:
  14771. // ES doesn't support 1D. Fake it with 2D.
  14772. res += options.es ? "2D" : "1D";
  14773. break;
  14774. case Dim2D:
  14775. res += "2D";
  14776. break;
  14777. case Dim3D:
  14778. res += "3D";
  14779. break;
  14780. case DimCube:
  14781. res += "Cube";
  14782. break;
  14783. case DimRect:
  14784. if (options.es)
  14785. SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
  14786. if (is_legacy_desktop())
  14787. require_extension_internal("GL_ARB_texture_rectangle");
  14788. res += "2DRect";
  14789. break;
  14790. case DimBuffer:
  14791. if (options.es && options.version < 320)
  14792. require_extension_internal("GL_EXT_texture_buffer");
  14793. else if (!options.es && options.version < 140)
  14794. require_extension_internal("GL_EXT_texture_buffer_object");
  14795. res += "Buffer";
  14796. break;
  14797. case DimSubpassData:
  14798. res += "2D";
  14799. break;
  14800. default:
  14801. SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
  14802. }
  14803. if (type.image.ms)
  14804. res += "MS";
  14805. if (type.image.arrayed)
  14806. {
  14807. if (is_legacy_desktop())
  14808. require_extension_internal("GL_EXT_texture_array");
  14809. res += "Array";
  14810. }
  14811. // "Shadow" state in GLSL only exists for samplers and combined image samplers.
  14812. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
  14813. is_depth_image(type, id))
  14814. {
  14815. res += "Shadow";
  14816. if (type.image.dim == DimCube && is_legacy())
  14817. {
  14818. if (!options.es)
  14819. require_extension_internal("GL_EXT_gpu_shader4");
  14820. else
  14821. {
  14822. require_extension_internal("GL_NV_shadow_samplers_cube");
  14823. res += "NV";
  14824. }
  14825. }
  14826. }
  14827. return res;
  14828. }
  14829. string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
  14830. {
  14831. if (backend.use_array_constructor && type.array.size() > 1)
  14832. {
  14833. if (options.flatten_multidimensional_arrays)
  14834. SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
  14835. "e.g. float[][]().");
  14836. else if (!options.es && options.version < 430)
  14837. require_extension_internal("GL_ARB_arrays_of_arrays");
  14838. else if (options.es && options.version < 310)
  14839. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
  14840. }
  14841. auto e = type_to_glsl(type);
  14842. if (backend.use_array_constructor)
  14843. {
  14844. for (uint32_t i = 0; i < type.array.size(); i++)
  14845. e += "[]";
  14846. }
  14847. return e;
  14848. }
  14849. // The optional id parameter indicates the object whose type we are trying
  14850. // to find the description for. It is optional. Most type descriptions do not
  14851. // depend on a specific object's use of that type.
  14852. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  14853. {
  14854. if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type))
  14855. {
  14856. // Need to create a magic type name which compacts the entire type information.
  14857. auto *parent = &get_pointee_type(type);
  14858. string name = type_to_glsl(*parent);
  14859. uint32_t array_stride = get_decoration(type.parent_type, DecorationArrayStride);
  14860. // Resolve all array dimensions in one go since once we lose the pointer type,
  14861. // array information is left to to_array_type_glsl. The base type loses array information.
  14862. while (is_array(*parent))
  14863. {
  14864. if (parent->array_size_literal.back())
  14865. name += join(type.array.back(), "_");
  14866. else
  14867. name += join("id", type.array.back(), "_");
  14868. name += "stride_" + std::to_string(array_stride);
  14869. array_stride = get_decoration(parent->parent_type, DecorationArrayStride);
  14870. parent = &get<SPIRType>(parent->parent_type);
  14871. }
  14872. name += "Pointer";
  14873. return name;
  14874. }
  14875. switch (type.basetype)
  14876. {
  14877. case SPIRType::Struct:
  14878. // Need OpName lookup here to get a "sensible" name for a struct.
  14879. if (backend.explicit_struct_type)
  14880. return join("struct ", to_name(type.self));
  14881. else
  14882. return to_name(type.self);
  14883. case SPIRType::Image:
  14884. case SPIRType::SampledImage:
  14885. return image_type_glsl(type, id);
  14886. case SPIRType::Sampler:
  14887. // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
  14888. // this distinction into the type system.
  14889. return comparison_ids.count(id) ? "samplerShadow" : "sampler";
  14890. case SPIRType::AccelerationStructure:
  14891. return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
  14892. case SPIRType::RayQuery:
  14893. return "rayQueryEXT";
  14894. case SPIRType::Tensor:
  14895. if (type.ext.tensor.rank == 0)
  14896. SPIRV_CROSS_THROW("GLSL tensors must have a Rank.");
  14897. if (type.ext.tensor.shape != 0)
  14898. SPIRV_CROSS_THROW("GLSL tensors cannot have a Shape.");
  14899. return join("tensorARM<", type_to_glsl(get<SPIRType>(type.ext.tensor.type)), ", ",
  14900. to_expression(type.ext.tensor.rank), ">");
  14901. case SPIRType::Void:
  14902. return "void";
  14903. default:
  14904. break;
  14905. }
  14906. if (type.basetype == SPIRType::UInt && is_legacy())
  14907. {
  14908. if (options.es)
  14909. // HACK: spirv-cross changes bools into uints and generates code which compares them to
  14910. // zero. Input code will have already been validated as not to have contained any uints,
  14911. // so any remaining uints must in fact be bools. However, simply returning "bool" here
  14912. // will result in invalid code. Instead, return an int.
  14913. return backend.basic_int_type;
  14914. else
  14915. require_extension_internal("GL_EXT_gpu_shader4");
  14916. }
  14917. if (type.basetype == SPIRType::AtomicCounter)
  14918. {
  14919. if (options.es && options.version < 310)
  14920. SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters.");
  14921. else if (!options.es && options.version < 420)
  14922. require_extension_internal("GL_ARB_shader_atomic_counters");
  14923. }
  14924. if (type.op == OpTypeCooperativeVectorNV)
  14925. {
  14926. require_extension_internal("GL_NV_cooperative_vector");
  14927. if (!options.vulkan_semantics)
  14928. SPIRV_CROSS_THROW("Cooperative vector NV only available in Vulkan.");
  14929. std::string component_type_str = type_to_glsl(get<SPIRType>(type.ext.coopVecNV.component_type_id));
  14930. return join("coopvecNV<", component_type_str, ", ", to_expression(type.ext.coopVecNV.component_count_id), ">");
  14931. }
  14932. const SPIRType *coop_type = &type;
  14933. while (is_pointer(*coop_type) || is_array(*coop_type))
  14934. coop_type = &get<SPIRType>(coop_type->parent_type);
  14935. if (coop_type->op == OpTypeCooperativeMatrixKHR)
  14936. {
  14937. require_extension_internal("GL_KHR_cooperative_matrix");
  14938. if (!options.vulkan_semantics)
  14939. SPIRV_CROSS_THROW("Cooperative matrix only available in Vulkan.");
  14940. // GLSL doesn't support this as spec constant, which makes sense ...
  14941. uint32_t use_type = get<SPIRConstant>(coop_type->ext.cooperative.use_id).scalar();
  14942. const char *use = nullptr;
  14943. switch (use_type)
  14944. {
  14945. case CooperativeMatrixUseMatrixAKHR:
  14946. use = "gl_MatrixUseA";
  14947. break;
  14948. case CooperativeMatrixUseMatrixBKHR:
  14949. use = "gl_MatrixUseB";
  14950. break;
  14951. case CooperativeMatrixUseMatrixAccumulatorKHR:
  14952. use = "gl_MatrixUseAccumulator";
  14953. break;
  14954. default:
  14955. SPIRV_CROSS_THROW("Invalid matrix use.");
  14956. }
  14957. string scope_expr;
  14958. if (const auto *scope = maybe_get<SPIRConstant>(coop_type->ext.cooperative.scope_id))
  14959. {
  14960. if (!scope->specialization)
  14961. {
  14962. require_extension_internal("GL_KHR_memory_scope_semantics");
  14963. if (scope->scalar() == ScopeSubgroup)
  14964. scope_expr = "gl_ScopeSubgroup";
  14965. else if (scope->scalar() == ScopeWorkgroup)
  14966. scope_expr = "gl_ScopeWorkgroup";
  14967. else
  14968. SPIRV_CROSS_THROW("Invalid scope for cooperative matrix.");
  14969. }
  14970. }
  14971. if (scope_expr.empty())
  14972. scope_expr = to_expression(coop_type->ext.cooperative.scope_id);
  14973. return join("coopmat<", type_to_glsl(get<SPIRType>(coop_type->parent_type)), ", ",
  14974. scope_expr, ", ",
  14975. to_expression(coop_type->ext.cooperative.rows_id), ", ",
  14976. to_expression(coop_type->ext.cooperative.columns_id), ", ", use, ">");
  14977. }
  14978. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  14979. {
  14980. switch (type.basetype)
  14981. {
  14982. case SPIRType::Boolean:
  14983. return "bool";
  14984. case SPIRType::SByte:
  14985. return backend.basic_int8_type;
  14986. case SPIRType::UByte:
  14987. return backend.basic_uint8_type;
  14988. case SPIRType::Short:
  14989. return backend.basic_int16_type;
  14990. case SPIRType::UShort:
  14991. return backend.basic_uint16_type;
  14992. case SPIRType::Int:
  14993. return backend.basic_int_type;
  14994. case SPIRType::UInt:
  14995. return backend.basic_uint_type;
  14996. case SPIRType::AtomicCounter:
  14997. return "atomic_uint";
  14998. case SPIRType::Half:
  14999. return "float16_t";
  15000. case SPIRType::BFloat16:
  15001. if (!options.vulkan_semantics)
  15002. SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
  15003. require_extension_internal("GL_EXT_bfloat16");
  15004. return "bfloat16_t";
  15005. case SPIRType::FloatE4M3:
  15006. if (!options.vulkan_semantics)
  15007. SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
  15008. require_extension_internal("GL_EXT_float_e4m3");
  15009. return "floate4m3_t";
  15010. case SPIRType::FloatE5M2:
  15011. if (!options.vulkan_semantics)
  15012. SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
  15013. require_extension_internal("GL_EXT_float_e5m2");
  15014. return "floate5m2_t";
  15015. case SPIRType::Float:
  15016. return "float";
  15017. case SPIRType::Double:
  15018. return "double";
  15019. case SPIRType::Int64:
  15020. return "int64_t";
  15021. case SPIRType::UInt64:
  15022. return "uint64_t";
  15023. default:
  15024. return "???";
  15025. }
  15026. }
  15027. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  15028. {
  15029. switch (type.basetype)
  15030. {
  15031. case SPIRType::Boolean:
  15032. return join("bvec", type.vecsize);
  15033. case SPIRType::SByte:
  15034. return join("i8vec", type.vecsize);
  15035. case SPIRType::UByte:
  15036. return join("u8vec", type.vecsize);
  15037. case SPIRType::Short:
  15038. return join("i16vec", type.vecsize);
  15039. case SPIRType::UShort:
  15040. return join("u16vec", type.vecsize);
  15041. case SPIRType::Int:
  15042. return join("ivec", type.vecsize);
  15043. case SPIRType::UInt:
  15044. return join("uvec", type.vecsize);
  15045. case SPIRType::Half:
  15046. return join("f16vec", type.vecsize);
  15047. case SPIRType::BFloat16:
  15048. if (!options.vulkan_semantics)
  15049. SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
  15050. require_extension_internal("GL_EXT_bfloat16");
  15051. return join("bf16vec", type.vecsize);
  15052. case SPIRType::FloatE4M3:
  15053. if (!options.vulkan_semantics)
  15054. SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
  15055. require_extension_internal("GL_EXT_float_e4m3");
  15056. return join("fe4m3vec", type.vecsize);
  15057. case SPIRType::FloatE5M2:
  15058. if (!options.vulkan_semantics)
  15059. SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
  15060. require_extension_internal("GL_EXT_float_e5m2");
  15061. return join("fe5m2vec", type.vecsize);
  15062. case SPIRType::Float:
  15063. return join("vec", type.vecsize);
  15064. case SPIRType::Double:
  15065. return join("dvec", type.vecsize);
  15066. case SPIRType::Int64:
  15067. return join("i64vec", type.vecsize);
  15068. case SPIRType::UInt64:
  15069. return join("u64vec", type.vecsize);
  15070. default:
  15071. return "???";
  15072. }
  15073. }
  15074. else if (type.vecsize == type.columns) // Simple Matrix builtin
  15075. {
  15076. switch (type.basetype)
  15077. {
  15078. case SPIRType::Boolean:
  15079. return join("bmat", type.vecsize);
  15080. case SPIRType::Int:
  15081. return join("imat", type.vecsize);
  15082. case SPIRType::UInt:
  15083. return join("umat", type.vecsize);
  15084. case SPIRType::Half:
  15085. return join("f16mat", type.vecsize);
  15086. case SPIRType::Float:
  15087. return join("mat", type.vecsize);
  15088. case SPIRType::Double:
  15089. return join("dmat", type.vecsize);
  15090. // Matrix types not supported for int64/uint64.
  15091. default:
  15092. return "???";
  15093. }
  15094. }
  15095. else
  15096. {
  15097. switch (type.basetype)
  15098. {
  15099. case SPIRType::Boolean:
  15100. return join("bmat", type.columns, "x", type.vecsize);
  15101. case SPIRType::Int:
  15102. return join("imat", type.columns, "x", type.vecsize);
  15103. case SPIRType::UInt:
  15104. return join("umat", type.columns, "x", type.vecsize);
  15105. case SPIRType::Half:
  15106. return join("f16mat", type.columns, "x", type.vecsize);
  15107. case SPIRType::Float:
  15108. return join("mat", type.columns, "x", type.vecsize);
  15109. case SPIRType::Double:
  15110. return join("dmat", type.columns, "x", type.vecsize);
  15111. // Matrix types not supported for int64/uint64.
  15112. default:
  15113. return "???";
  15114. }
  15115. }
  15116. }
  15117. void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
  15118. const unordered_set<string> &variables_secondary, string &name)
  15119. {
  15120. if (name.empty())
  15121. return;
  15122. ParsedIR::sanitize_underscores(name);
  15123. if (ParsedIR::is_globally_reserved_identifier(name, true))
  15124. {
  15125. name.clear();
  15126. return;
  15127. }
  15128. update_name_cache(variables_primary, variables_secondary, name);
  15129. }
  15130. void CompilerGLSL::add_local_variable_name(uint32_t id)
  15131. {
  15132. add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
  15133. }
  15134. void CompilerGLSL::add_resource_name(uint32_t id)
  15135. {
  15136. add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
  15137. }
  15138. void CompilerGLSL::add_header_line(const std::string &line)
  15139. {
  15140. header_lines.push_back(line);
  15141. }
  15142. bool CompilerGLSL::has_extension(const std::string &ext) const
  15143. {
  15144. auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
  15145. return itr != end(forced_extensions);
  15146. }
  15147. void CompilerGLSL::require_extension(const std::string &ext)
  15148. {
  15149. if (!has_extension(ext))
  15150. forced_extensions.push_back(ext);
  15151. }
  15152. const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const
  15153. {
  15154. return forced_extensions;
  15155. }
  15156. void CompilerGLSL::require_extension_internal(const string &ext)
  15157. {
  15158. if (backend.supports_extensions && !has_extension(ext))
  15159. {
  15160. forced_extensions.push_back(ext);
  15161. force_recompile();
  15162. }
  15163. }
  15164. void CompilerGLSL::flatten_buffer_block(VariableID id)
  15165. {
  15166. auto &var = get<SPIRVariable>(id);
  15167. auto &type = get<SPIRType>(var.basetype);
  15168. auto name = to_name(type.self, false);
  15169. auto &flags = get_decoration_bitset(type.self);
  15170. if (!type.array.empty())
  15171. SPIRV_CROSS_THROW(name + " is an array of UBOs.");
  15172. if (type.basetype != SPIRType::Struct)
  15173. SPIRV_CROSS_THROW(name + " is not a struct.");
  15174. if (!flags.get(DecorationBlock))
  15175. SPIRV_CROSS_THROW(name + " is not a block.");
  15176. if (type.member_types.empty())
  15177. SPIRV_CROSS_THROW(name + " is an empty struct.");
  15178. flattened_buffer_blocks.insert(id);
  15179. }
  15180. bool CompilerGLSL::builtin_translates_to_nonarray(BuiltIn /*builtin*/) const
  15181. {
  15182. return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
  15183. }
  15184. bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const
  15185. {
  15186. return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources.
  15187. }
  15188. bool CompilerGLSL::check_atomic_image(uint32_t id)
  15189. {
  15190. auto &type = expression_type(id);
  15191. if (type.storage == StorageClassImage)
  15192. {
  15193. if (options.es && options.version < 320)
  15194. require_extension_internal("GL_OES_shader_image_atomic");
  15195. auto *var = maybe_get_backing_variable(id);
  15196. if (var)
  15197. {
  15198. if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable))
  15199. {
  15200. unset_decoration(var->self, DecorationNonWritable);
  15201. unset_decoration(var->self, DecorationNonReadable);
  15202. force_recompile();
  15203. }
  15204. }
  15205. return true;
  15206. }
  15207. else
  15208. return false;
  15209. }
  15210. void CompilerGLSL::add_function_overload(const SPIRFunction &func)
  15211. {
  15212. Hasher hasher;
  15213. for (auto &arg : func.arguments)
  15214. {
  15215. // Parameters can vary with pointer type or not,
  15216. // but that will not change the signature in GLSL/HLSL,
  15217. // so strip the pointer type before hashing.
  15218. uint32_t type_id = get_pointee_type_id(arg.type);
  15219. // Workaround glslang bug. It seems to only consider the base type when resolving overloads.
  15220. if (get<SPIRType>(type_id).op == OpTypeCooperativeMatrixKHR)
  15221. type_id = get<SPIRType>(type_id).parent_type;
  15222. auto &type = get<SPIRType>(type_id);
  15223. if (!combined_image_samplers.empty())
  15224. {
  15225. // If we have combined image samplers, we cannot really trust the image and sampler arguments
  15226. // we pass down to callees, because they may be shuffled around.
  15227. // Ignore these arguments, to make sure that functions need to differ in some other way
  15228. // to be considered different overloads.
  15229. if (type.basetype == SPIRType::SampledImage ||
  15230. (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
  15231. {
  15232. continue;
  15233. }
  15234. }
  15235. hasher.u32(type_id);
  15236. }
  15237. uint64_t types_hash = hasher.get();
  15238. auto function_name = to_name(func.self);
  15239. auto itr = function_overloads.find(function_name);
  15240. if (itr != end(function_overloads))
  15241. {
  15242. // There exists a function with this name already.
  15243. auto &overloads = itr->second;
  15244. if (overloads.count(types_hash) != 0)
  15245. {
  15246. // Overload conflict, assign a new name.
  15247. add_resource_name(func.self);
  15248. function_overloads[to_name(func.self)].insert(types_hash);
  15249. }
  15250. else
  15251. {
  15252. // Can reuse the name.
  15253. overloads.insert(types_hash);
  15254. }
  15255. }
  15256. else
  15257. {
  15258. // First time we see this function name.
  15259. add_resource_name(func.self);
  15260. function_overloads[to_name(func.self)].insert(types_hash);
  15261. }
  15262. }
  15263. void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  15264. {
  15265. if (func.self != ir.default_entry_point)
  15266. add_function_overload(func);
  15267. // Avoid shadow declarations.
  15268. local_variable_names = resource_names;
  15269. string decl;
  15270. auto &type = get<SPIRType>(func.return_type);
  15271. decl += flags_to_qualifiers_glsl(type, 0, return_flags);
  15272. decl += type_to_glsl(type);
  15273. decl += type_to_array_glsl(type, 0);
  15274. decl += " ";
  15275. if (func.self == ir.default_entry_point)
  15276. {
  15277. // If we need complex fallback in GLSL, we just wrap main() in a function
  15278. // and interlock the entire shader ...
  15279. if (interlocked_is_complex)
  15280. decl += "spvMainInterlockedBody";
  15281. else
  15282. decl += "main";
  15283. processing_entry_point = true;
  15284. }
  15285. else
  15286. decl += to_name(func.self);
  15287. decl += "(";
  15288. SmallVector<string> arglist;
  15289. for (auto &arg : func.arguments)
  15290. {
  15291. // Do not pass in separate images or samplers if we're remapping
  15292. // to combined image samplers.
  15293. if (skip_argument(arg.id))
  15294. continue;
  15295. // Might change the variable name if it already exists in this function.
  15296. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  15297. // to use same name for variables.
  15298. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  15299. add_local_variable_name(arg.id);
  15300. arglist.push_back(argument_decl(arg));
  15301. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  15302. auto *var = maybe_get<SPIRVariable>(arg.id);
  15303. if (var)
  15304. var->parameter = &arg;
  15305. }
  15306. for (auto &arg : func.shadow_arguments)
  15307. {
  15308. // Might change the variable name if it already exists in this function.
  15309. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  15310. // to use same name for variables.
  15311. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  15312. add_local_variable_name(arg.id);
  15313. arglist.push_back(argument_decl(arg));
  15314. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  15315. auto *var = maybe_get<SPIRVariable>(arg.id);
  15316. if (var)
  15317. var->parameter = &arg;
  15318. }
  15319. decl += merge(arglist);
  15320. decl += ")";
  15321. statement(decl);
  15322. }
  15323. void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
  15324. {
  15325. // Avoid potential cycles.
  15326. if (func.active)
  15327. return;
  15328. func.active = true;
  15329. // If we depend on a function, emit that function before we emit our own function.
  15330. for (auto block : func.blocks)
  15331. {
  15332. auto &b = get<SPIRBlock>(block);
  15333. for (auto &i : b.ops)
  15334. {
  15335. auto ops = stream(i);
  15336. auto op = static_cast<Op>(i.op);
  15337. if (op == OpFunctionCall)
  15338. {
  15339. // Recursively emit functions which are called.
  15340. uint32_t id = ops[2];
  15341. emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
  15342. }
  15343. }
  15344. }
  15345. if (func.entry_line.file_id != 0)
  15346. emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
  15347. emit_function_prototype(func, return_flags);
  15348. begin_scope();
  15349. if (func.self == ir.default_entry_point)
  15350. emit_entry_point_declarations();
  15351. current_function = &func;
  15352. auto &entry_block = get<SPIRBlock>(func.entry_block);
  15353. sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
  15354. for (auto &array : func.constant_arrays_needed_on_stack)
  15355. {
  15356. auto &c = get<SPIRConstant>(array);
  15357. auto &type = get<SPIRType>(c.constant_type);
  15358. statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
  15359. }
  15360. for (auto &v : func.local_variables)
  15361. {
  15362. auto &var = get<SPIRVariable>(v);
  15363. var.deferred_declaration = false;
  15364. if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
  15365. continue;
  15366. if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
  15367. {
  15368. // Special variable type which cannot have initializer,
  15369. // need to be declared as standalone variables.
  15370. // Comes from MSL which can push global variables as local variables in main function.
  15371. add_local_variable_name(var.self);
  15372. statement(variable_decl(var), ";");
  15373. // "Real" workgroup variables in compute shaders needs extra caretaking.
  15374. // They need to be initialized with an extra routine as they come in arbitrary form.
  15375. if (var.storage == StorageClassWorkgroup && var.initializer)
  15376. emit_workgroup_initialization(var);
  15377. var.deferred_declaration = false;
  15378. }
  15379. else if (var.storage == StorageClassPrivate)
  15380. {
  15381. // These variables will not have had their CFG usage analyzed, so move it to the entry block.
  15382. // Comes from MSL which can push global variables as local variables in main function.
  15383. // We could just declare them right now, but we would miss out on an important initialization case which is
  15384. // LUT declaration in MSL.
  15385. // If we don't declare the variable when it is assigned we're forced to go through a helper function
  15386. // which copies elements one by one.
  15387. add_local_variable_name(var.self);
  15388. if (var.initializer)
  15389. {
  15390. statement(variable_decl(var), ";");
  15391. var.deferred_declaration = false;
  15392. }
  15393. else
  15394. {
  15395. auto &dominated = entry_block.dominated_variables;
  15396. if (find(begin(dominated), end(dominated), var.self) == end(dominated))
  15397. entry_block.dominated_variables.push_back(var.self);
  15398. var.deferred_declaration = true;
  15399. }
  15400. }
  15401. else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
  15402. {
  15403. // No need to declare this variable, it has a static expression.
  15404. var.deferred_declaration = false;
  15405. }
  15406. else if (expression_is_lvalue(v))
  15407. {
  15408. add_local_variable_name(var.self);
  15409. // Loop variables should never be declared early, they are explicitly emitted in a loop.
  15410. if (var.initializer && !var.loop_variable)
  15411. statement(variable_decl_function_local(var), ";");
  15412. else
  15413. {
  15414. // Don't declare variable until first use to declutter the GLSL output quite a lot.
  15415. // If we don't touch the variable before first branch,
  15416. // declare it then since we need variable declaration to be in top scope.
  15417. var.deferred_declaration = true;
  15418. }
  15419. }
  15420. else
  15421. {
  15422. // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
  15423. // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
  15424. // This means that when we OpStore to these variables, we just write in the expression ID directly.
  15425. // This breaks any kind of branching, since the variable must be statically assigned.
  15426. // Branching on samplers and images would be pretty much impossible to fake in GLSL.
  15427. var.statically_assigned = true;
  15428. }
  15429. var.loop_variable_enable = false;
  15430. // Loop variables are never declared outside their for-loop, so block any implicit declaration.
  15431. if (var.loop_variable)
  15432. {
  15433. var.deferred_declaration = false;
  15434. // Need to reset the static expression so we can fallback to initializer if need be.
  15435. var.static_expression = 0;
  15436. }
  15437. }
  15438. // Enforce declaration order for regression testing purposes.
  15439. for (auto &block_id : func.blocks)
  15440. {
  15441. auto &block = get<SPIRBlock>(block_id);
  15442. sort(begin(block.dominated_variables), end(block.dominated_variables));
  15443. }
  15444. for (auto &line : current_function->fixup_hooks_in)
  15445. line();
  15446. emit_block_chain(entry_block);
  15447. end_scope();
  15448. processing_entry_point = false;
  15449. statement("");
  15450. // Make sure deferred declaration state for local variables is cleared when we are done with function.
  15451. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
  15452. for (auto &v : func.local_variables)
  15453. {
  15454. auto &var = get<SPIRVariable>(v);
  15455. var.deferred_declaration = false;
  15456. }
  15457. }
  15458. void CompilerGLSL::emit_fixup()
  15459. {
  15460. if (is_vertex_like_shader())
  15461. {
  15462. if (options.vertex.fixup_clipspace)
  15463. {
  15464. const char *suffix = backend.float_literal_suffix ? "f" : "";
  15465. statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
  15466. }
  15467. if (options.vertex.flip_vert_y)
  15468. statement("gl_Position.y = -gl_Position.y;");
  15469. }
  15470. }
  15471. void CompilerGLSL::emit_workgroup_initialization(const SPIRVariable &)
  15472. {
  15473. }
  15474. void CompilerGLSL::flush_phi(BlockID from, BlockID to)
  15475. {
  15476. auto &child = get<SPIRBlock>(to);
  15477. if (child.ignore_phi_from_block == from)
  15478. return;
  15479. unordered_set<uint32_t> temporary_phi_variables;
  15480. for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
  15481. {
  15482. auto &phi = *itr;
  15483. if (phi.parent == from)
  15484. {
  15485. auto &var = get<SPIRVariable>(phi.function_variable);
  15486. // A Phi variable might be a loop variable, so flush to static expression.
  15487. if (var.loop_variable && !var.loop_variable_enable)
  15488. var.static_expression = phi.local_variable;
  15489. else
  15490. {
  15491. flush_variable_declaration(phi.function_variable);
  15492. // Check if we are going to write to a Phi variable that another statement will read from
  15493. // as part of another Phi node in our target block.
  15494. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
  15495. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
  15496. bool need_saved_temporary =
  15497. find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
  15498. return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
  15499. }) != end(child.phi_variables);
  15500. if (need_saved_temporary)
  15501. {
  15502. // Need to make sure we declare the phi variable with a copy at the right scope.
  15503. // We cannot safely declare a temporary here since we might be inside a continue block.
  15504. if (!var.allocate_temporary_copy)
  15505. {
  15506. var.allocate_temporary_copy = true;
  15507. force_recompile();
  15508. }
  15509. statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
  15510. temporary_phi_variables.insert(phi.function_variable);
  15511. }
  15512. // This might be called in continue block, so make sure we
  15513. // use this to emit ESSL 1.0 compliant increments/decrements.
  15514. auto lhs = to_expression(phi.function_variable);
  15515. string rhs;
  15516. if (temporary_phi_variables.count(phi.local_variable))
  15517. rhs = join("_", phi.local_variable, "_copy");
  15518. else
  15519. rhs = to_pointer_expression(phi.local_variable);
  15520. if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
  15521. statement(lhs, " = ", rhs, ";");
  15522. }
  15523. register_write(phi.function_variable);
  15524. }
  15525. }
  15526. }
  15527. void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
  15528. {
  15529. auto &to_block = get<SPIRBlock>(to);
  15530. if (from == to)
  15531. return;
  15532. assert(is_continue(to));
  15533. if (to_block.complex_continue)
  15534. {
  15535. // Just emit the whole block chain as is.
  15536. auto usage_counts = expression_usage_counts;
  15537. emit_block_chain(to_block);
  15538. // Expression usage counts are moot after returning from the continue block.
  15539. expression_usage_counts = usage_counts;
  15540. }
  15541. else
  15542. {
  15543. auto &from_block = get<SPIRBlock>(from);
  15544. bool outside_control_flow = false;
  15545. uint32_t loop_dominator = 0;
  15546. // FIXME: Refactor this to not use the old loop_dominator tracking.
  15547. if (from_block.merge_block)
  15548. {
  15549. // If we are a loop header, we don't set the loop dominator,
  15550. // so just use "self" here.
  15551. loop_dominator = from;
  15552. }
  15553. else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  15554. {
  15555. loop_dominator = from_block.loop_dominator;
  15556. }
  15557. if (loop_dominator != 0)
  15558. {
  15559. auto &cfg = get_cfg_for_current_function();
  15560. // For non-complex continue blocks, we implicitly branch to the continue block
  15561. // by having the continue block be part of the loop header in for (; ; continue-block).
  15562. outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
  15563. }
  15564. // Some simplification for for-loops. We always end up with a useless continue;
  15565. // statement since we branch to a loop block.
  15566. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
  15567. // we can avoid writing out an explicit continue statement.
  15568. // Similar optimization to return statements if we know we're outside flow control.
  15569. if (!outside_control_flow)
  15570. statement("continue;");
  15571. }
  15572. }
  15573. void CompilerGLSL::branch(BlockID from, BlockID to)
  15574. {
  15575. flush_phi(from, to);
  15576. flush_control_dependent_expressions(from);
  15577. bool to_is_continue = is_continue(to);
  15578. // This is only a continue if we branch to our loop dominator.
  15579. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
  15580. {
  15581. // This can happen if we had a complex continue block which was emitted.
  15582. // Once the continue block tries to branch to the loop header, just emit continue;
  15583. // and end the chain here.
  15584. statement("continue;");
  15585. }
  15586. else if (from != to && is_break(to))
  15587. {
  15588. // We cannot break to ourselves, so check explicitly for from != to.
  15589. // This case can trigger if a loop header is all three of these things:
  15590. // - Continue block
  15591. // - Loop header
  15592. // - Break merge target all at once ...
  15593. // Very dirty workaround.
  15594. // Switch constructs are able to break, but they cannot break out of a loop at the same time,
  15595. // yet SPIR-V allows it.
  15596. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
  15597. // write to the ladder here, and defer the break.
  15598. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
  15599. if (is_loop_break(to))
  15600. {
  15601. for (size_t n = current_emitting_switch_stack.size(); n; n--)
  15602. {
  15603. auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
  15604. if (current_emitting_switch &&
  15605. current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
  15606. get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
  15607. {
  15608. if (!current_emitting_switch->need_ladder_break)
  15609. {
  15610. force_recompile();
  15611. current_emitting_switch->need_ladder_break = true;
  15612. }
  15613. statement("_", current_emitting_switch->self, "_ladder_break = true;");
  15614. }
  15615. else
  15616. break;
  15617. }
  15618. }
  15619. statement("break;");
  15620. }
  15621. else if (to_is_continue || from == to)
  15622. {
  15623. // For from == to case can happen for a do-while loop which branches into itself.
  15624. // We don't mark these cases as continue blocks, but the only possible way to branch into
  15625. // ourselves is through means of continue blocks.
  15626. // If we are merging to a continue block, there is no need to emit the block chain for continue here.
  15627. // We can branch to the continue block after we merge execution.
  15628. // Here we make use of structured control flow rules from spec:
  15629. // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
  15630. // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
  15631. // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
  15632. auto &block_meta = ir.block_meta[to];
  15633. bool branching_to_merge =
  15634. (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
  15635. ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
  15636. if (!to_is_continue || !branching_to_merge)
  15637. branch_to_continue(from, to);
  15638. }
  15639. else if (!is_conditional(to))
  15640. emit_block_chain(get<SPIRBlock>(to));
  15641. // It is important that we check for break before continue.
  15642. // A block might serve two purposes, a break block for the inner scope, and
  15643. // a continue block in the outer scope.
  15644. // Inner scope always takes precedence.
  15645. }
  15646. void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
  15647. {
  15648. auto &from_block = get<SPIRBlock>(from);
  15649. BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
  15650. // If we branch directly to our selection merge target, we don't need a code path.
  15651. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
  15652. bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
  15653. if (!true_block_needs_code && !false_block_needs_code)
  15654. return;
  15655. // We might have a loop merge here. Only consider selection flattening constructs.
  15656. // Loop hints are handled explicitly elsewhere.
  15657. if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
  15658. emit_block_hints(from_block);
  15659. if (true_block_needs_code)
  15660. {
  15661. statement("if (", to_expression(cond), ")");
  15662. begin_scope();
  15663. branch(from, true_block);
  15664. end_scope();
  15665. if (false_block_needs_code)
  15666. {
  15667. statement("else");
  15668. begin_scope();
  15669. branch(from, false_block);
  15670. end_scope();
  15671. }
  15672. }
  15673. else if (false_block_needs_code)
  15674. {
  15675. // Only need false path, use negative conditional.
  15676. statement("if (!", to_enclosed_expression(cond), ")");
  15677. begin_scope();
  15678. branch(from, false_block);
  15679. end_scope();
  15680. }
  15681. }
  15682. // FIXME: This currently cannot handle complex continue blocks
  15683. // as in do-while.
  15684. // This should be seen as a "trivial" continue block.
  15685. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
  15686. {
  15687. auto *block = &get<SPIRBlock>(continue_block);
  15688. // While emitting the continue block, declare_temporary will check this
  15689. // if we have to emit temporaries.
  15690. current_continue_block = block;
  15691. SmallVector<string> statements;
  15692. // Capture all statements into our list.
  15693. auto *old = redirect_statement;
  15694. redirect_statement = &statements;
  15695. // Stamp out all blocks one after each other.
  15696. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
  15697. {
  15698. // Write out all instructions we have in this block.
  15699. emit_block_instructions(*block);
  15700. // For plain branchless for/while continue blocks.
  15701. if (block->next_block)
  15702. {
  15703. flush_phi(continue_block, block->next_block);
  15704. block = &get<SPIRBlock>(block->next_block);
  15705. }
  15706. // For do while blocks. The last block will be a select block.
  15707. else if (block->true_block && follow_true_block)
  15708. {
  15709. flush_phi(continue_block, block->true_block);
  15710. block = &get<SPIRBlock>(block->true_block);
  15711. }
  15712. else if (block->false_block && follow_false_block)
  15713. {
  15714. flush_phi(continue_block, block->false_block);
  15715. block = &get<SPIRBlock>(block->false_block);
  15716. }
  15717. else
  15718. {
  15719. SPIRV_CROSS_THROW("Invalid continue block detected!");
  15720. }
  15721. }
  15722. // Restore old pointer.
  15723. redirect_statement = old;
  15724. // Somewhat ugly, strip off the last ';' since we use ',' instead.
  15725. // Ideally, we should select this behavior in statement().
  15726. for (auto &s : statements)
  15727. {
  15728. if (!s.empty() && s.back() == ';')
  15729. s.erase(s.size() - 1, 1);
  15730. }
  15731. current_continue_block = nullptr;
  15732. return merge(statements);
  15733. }
  15734. void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
  15735. {
  15736. // While loops do not take initializers, so declare all of them outside.
  15737. for (auto &loop_var : block.loop_variables)
  15738. {
  15739. auto &var = get<SPIRVariable>(loop_var);
  15740. statement(variable_decl(var), ";");
  15741. }
  15742. }
  15743. string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
  15744. {
  15745. if (block.loop_variables.empty())
  15746. return "";
  15747. bool same_types = for_loop_initializers_are_same_type(block);
  15748. // We can only declare for loop initializers if all variables are of same type.
  15749. // If we cannot do this, declare individual variables before the loop header.
  15750. // We might have a loop variable candidate which was not assigned to for some reason.
  15751. uint32_t missing_initializers = 0;
  15752. for (auto &variable : block.loop_variables)
  15753. {
  15754. uint32_t expr = get<SPIRVariable>(variable).static_expression;
  15755. // Sometimes loop variables are initialized with OpUndef, but we can just declare
  15756. // a plain variable without initializer in this case.
  15757. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  15758. missing_initializers++;
  15759. }
  15760. if (block.loop_variables.size() == 1 && missing_initializers == 0)
  15761. {
  15762. return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
  15763. }
  15764. else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
  15765. {
  15766. for (auto &loop_var : block.loop_variables)
  15767. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  15768. return "";
  15769. }
  15770. else
  15771. {
  15772. // We have a mix of loop variables, either ones with a clear initializer, or ones without.
  15773. // Separate the two streams.
  15774. string expr;
  15775. for (auto &loop_var : block.loop_variables)
  15776. {
  15777. uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
  15778. if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
  15779. {
  15780. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  15781. }
  15782. else
  15783. {
  15784. auto &var = get<SPIRVariable>(loop_var);
  15785. auto &type = get_variable_data_type(var);
  15786. if (expr.empty())
  15787. {
  15788. // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
  15789. expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
  15790. }
  15791. else
  15792. {
  15793. expr += ", ";
  15794. // In MSL, being based on C++, the asterisk marking a pointer
  15795. // binds to the identifier, not the type.
  15796. if (type.pointer)
  15797. expr += "* ";
  15798. }
  15799. expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
  15800. }
  15801. }
  15802. return expr;
  15803. }
  15804. }
  15805. bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
  15806. {
  15807. if (block.loop_variables.size() <= 1)
  15808. return true;
  15809. uint32_t expected = 0;
  15810. Bitset expected_flags;
  15811. for (auto &var : block.loop_variables)
  15812. {
  15813. // Don't care about uninitialized variables as they will not be part of the initializers.
  15814. uint32_t expr = get<SPIRVariable>(var).static_expression;
  15815. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  15816. continue;
  15817. if (expected == 0)
  15818. {
  15819. expected = get<SPIRVariable>(var).basetype;
  15820. expected_flags = get_decoration_bitset(var);
  15821. }
  15822. else if (expected != get<SPIRVariable>(var).basetype)
  15823. return false;
  15824. // Precision flags and things like that must also match.
  15825. if (expected_flags != get_decoration_bitset(var))
  15826. return false;
  15827. }
  15828. return true;
  15829. }
  15830. void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block)
  15831. {
  15832. // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise,
  15833. // which breaks loop optimizations.
  15834. // Any line directive would be declared outside the loop body, which would just be confusing either way.
  15835. bool old_block_debug_directives = block_debug_directives;
  15836. block_debug_directives = true;
  15837. emit_block_instructions(block);
  15838. block_debug_directives = old_block_debug_directives;
  15839. }
  15840. bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
  15841. {
  15842. SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  15843. if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
  15844. {
  15845. uint32_t current_count = statement_count;
  15846. // If we're trying to create a true for loop,
  15847. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  15848. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  15849. emit_block_instructions_with_masked_debug(block);
  15850. bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
  15851. bool flushes_phi = flush_phi_required(block.self, block.true_block) ||
  15852. flush_phi_required(block.self, block.false_block);
  15853. // This can work! We only did trivial things which could be forwarded in block body!
  15854. if (!flushes_phi && current_count == statement_count && condition_is_temporary)
  15855. {
  15856. switch (continue_type)
  15857. {
  15858. case SPIRBlock::ForLoop:
  15859. {
  15860. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  15861. flush_undeclared_variables(block);
  15862. // Important that we do this in this order because
  15863. // emitting the continue block can invalidate the condition expression.
  15864. auto initializer = emit_for_loop_initializers(block);
  15865. auto condition = to_expression(block.condition);
  15866. // Condition might have to be inverted.
  15867. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  15868. condition = join("!", enclose_expression(condition));
  15869. emit_block_hints(block);
  15870. if (method != SPIRBlock::MergeToSelectContinueForLoop)
  15871. {
  15872. auto continue_block = emit_continue_block(block.continue_block, false, false);
  15873. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  15874. }
  15875. else
  15876. statement("for (", initializer, "; ", condition, "; )");
  15877. break;
  15878. }
  15879. case SPIRBlock::WhileLoop:
  15880. {
  15881. // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
  15882. flush_undeclared_variables(block);
  15883. emit_while_loop_initializers(block);
  15884. emit_block_hints(block);
  15885. auto condition = to_expression(block.condition);
  15886. // Condition might have to be inverted.
  15887. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  15888. condition = join("!", enclose_expression(condition));
  15889. statement("while (", condition, ")");
  15890. break;
  15891. }
  15892. default:
  15893. block.disable_block_optimization = true;
  15894. force_recompile();
  15895. begin_scope(); // We'll see an end_scope() later.
  15896. return false;
  15897. }
  15898. begin_scope();
  15899. return true;
  15900. }
  15901. else
  15902. {
  15903. block.disable_block_optimization = true;
  15904. force_recompile();
  15905. begin_scope(); // We'll see an end_scope() later.
  15906. return false;
  15907. }
  15908. }
  15909. else if (method == SPIRBlock::MergeToDirectForLoop)
  15910. {
  15911. auto &child = get<SPIRBlock>(block.next_block);
  15912. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  15913. flush_undeclared_variables(child);
  15914. uint32_t current_count = statement_count;
  15915. // If we're trying to create a true for loop,
  15916. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  15917. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  15918. emit_block_instructions_with_masked_debug(child);
  15919. bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
  15920. bool flushes_phi = flush_phi_required(child.self, child.true_block) ||
  15921. flush_phi_required(child.self, child.false_block);
  15922. if (!flushes_phi && current_count == statement_count && condition_is_temporary)
  15923. {
  15924. uint32_t target_block = child.true_block;
  15925. switch (continue_type)
  15926. {
  15927. case SPIRBlock::ForLoop:
  15928. {
  15929. // Important that we do this in this order because
  15930. // emitting the continue block can invalidate the condition expression.
  15931. auto initializer = emit_for_loop_initializers(block);
  15932. auto condition = to_expression(child.condition);
  15933. // Condition might have to be inverted.
  15934. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  15935. {
  15936. condition = join("!", enclose_expression(condition));
  15937. target_block = child.false_block;
  15938. }
  15939. auto continue_block = emit_continue_block(block.continue_block, false, false);
  15940. emit_block_hints(block);
  15941. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  15942. break;
  15943. }
  15944. case SPIRBlock::WhileLoop:
  15945. {
  15946. emit_while_loop_initializers(block);
  15947. emit_block_hints(block);
  15948. auto condition = to_expression(child.condition);
  15949. // Condition might have to be inverted.
  15950. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  15951. {
  15952. condition = join("!", enclose_expression(condition));
  15953. target_block = child.false_block;
  15954. }
  15955. statement("while (", condition, ")");
  15956. break;
  15957. }
  15958. default:
  15959. block.disable_block_optimization = true;
  15960. force_recompile();
  15961. begin_scope(); // We'll see an end_scope() later.
  15962. return false;
  15963. }
  15964. begin_scope();
  15965. branch(child.self, target_block);
  15966. return true;
  15967. }
  15968. else
  15969. {
  15970. block.disable_block_optimization = true;
  15971. force_recompile();
  15972. begin_scope(); // We'll see an end_scope() later.
  15973. return false;
  15974. }
  15975. }
  15976. else
  15977. return false;
  15978. }
  15979. void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
  15980. {
  15981. for (auto &v : block.dominated_variables)
  15982. flush_variable_declaration(v);
  15983. }
  15984. void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
  15985. {
  15986. // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
  15987. // Need to sort these to ensure that reference output is stable.
  15988. sort(begin(temporaries), end(temporaries),
  15989. [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
  15990. for (auto &tmp : temporaries)
  15991. {
  15992. auto &type = get<SPIRType>(tmp.first);
  15993. // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
  15994. // This should be ignored unless we're doing actual variable pointers and backend supports it.
  15995. // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
  15996. if (type.pointer && !backend.native_pointers)
  15997. continue;
  15998. add_local_variable_name(tmp.second);
  15999. auto &flags = get_decoration_bitset(tmp.second);
  16000. // Not all targets support pointer literals, so don't bother with that case.
  16001. string initializer;
  16002. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  16003. initializer = join(" = ", to_zero_initialized_expression(tmp.first));
  16004. statement(flags_to_qualifiers_glsl(type, tmp.second, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
  16005. hoisted_temporaries.insert(tmp.second);
  16006. forced_temporaries.insert(tmp.second);
  16007. // The temporary might be read from before it's assigned, set up the expression now.
  16008. set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
  16009. // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
  16010. // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
  16011. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second);
  16012. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
  16013. {
  16014. uint32_t mirror_id = mirrored_precision_itr->second;
  16015. auto &mirror_flags = get_decoration_bitset(mirror_id);
  16016. statement(flags_to_qualifiers_glsl(type, mirror_id, mirror_flags),
  16017. variable_decl(type, to_name(mirror_id)),
  16018. initializer, ";");
  16019. // The temporary might be read from before it's assigned, set up the expression now.
  16020. set<SPIRExpression>(mirror_id, to_name(mirror_id), tmp.first, true);
  16021. hoisted_temporaries.insert(mirror_id);
  16022. }
  16023. }
  16024. }
  16025. void CompilerGLSL::emit_block_chain(SPIRBlock &block)
  16026. {
  16027. SmallVector<BlockID> cleanup_stack;
  16028. BlockID next_block = emit_block_chain_inner(block);
  16029. while (next_block != 0)
  16030. {
  16031. cleanup_stack.push_back(next_block);
  16032. next_block = emit_block_chain_inner(get<SPIRBlock>(next_block));
  16033. }
  16034. while (!cleanup_stack.empty())
  16035. {
  16036. emit_block_chain_cleanup(get<SPIRBlock>(cleanup_stack.back()));
  16037. cleanup_stack.pop_back();
  16038. }
  16039. emit_block_chain_cleanup(block);
  16040. }
  16041. BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block)
  16042. {
  16043. bool select_branch_to_true_block = false;
  16044. bool select_branch_to_false_block = false;
  16045. bool skip_direct_branch = false;
  16046. bool emitted_loop_header_variables = false;
  16047. bool force_complex_continue_block = false;
  16048. ValueSaver<uint32_t> loop_level_saver(current_loop_level);
  16049. if (block.merge == SPIRBlock::MergeLoop)
  16050. add_loop_level();
  16051. // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries.
  16052. for (auto var_id : block.dominated_variables)
  16053. {
  16054. auto &var = get<SPIRVariable>(var_id);
  16055. if (var.phi_variable)
  16056. {
  16057. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id);
  16058. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() &&
  16059. find_if(block.declare_temporary.begin(), block.declare_temporary.end(),
  16060. [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) {
  16061. return p.second == mirrored_precision_itr->second;
  16062. }) == block.declare_temporary.end())
  16063. {
  16064. block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second });
  16065. }
  16066. }
  16067. }
  16068. emit_hoisted_temporaries(block.declare_temporary);
  16069. SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
  16070. if (block.continue_block)
  16071. {
  16072. continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  16073. // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
  16074. if (continue_type == SPIRBlock::ComplexLoop)
  16075. block.complex_continue = true;
  16076. }
  16077. // If we have loop variables, stop masking out access to the variable now.
  16078. for (auto var_id : block.loop_variables)
  16079. {
  16080. auto &var = get<SPIRVariable>(var_id);
  16081. var.loop_variable_enable = true;
  16082. // We're not going to declare the variable directly, so emit a copy here.
  16083. emit_variable_temporary_copies(var);
  16084. }
  16085. // Remember deferred declaration state. We will restore it before returning.
  16086. assert(block.rearm_dominated_variables.empty());
  16087. block.rearm_dominated_variables.resize(block.dominated_variables.size());
  16088. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  16089. {
  16090. uint32_t var_id = block.dominated_variables[i];
  16091. auto &var = get<SPIRVariable>(var_id);
  16092. block.rearm_dominated_variables[i] = var.deferred_declaration;
  16093. }
  16094. // This is the method often used by spirv-opt to implement loops.
  16095. // The loop header goes straight into the continue block.
  16096. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
  16097. // it *MUST* be used in the continue block. This loop method will not work.
  16098. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
  16099. {
  16100. flush_undeclared_variables(block);
  16101. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
  16102. {
  16103. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  16104. select_branch_to_false_block = true;
  16105. else
  16106. select_branch_to_true_block = true;
  16107. emitted_loop_header_variables = true;
  16108. force_complex_continue_block = true;
  16109. }
  16110. }
  16111. // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
  16112. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
  16113. {
  16114. flush_undeclared_variables(block);
  16115. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
  16116. {
  16117. // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
  16118. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  16119. select_branch_to_false_block = true;
  16120. else
  16121. select_branch_to_true_block = true;
  16122. emitted_loop_header_variables = true;
  16123. }
  16124. }
  16125. // This is the newer loop behavior in glslang which branches from Loop header directly to
  16126. // a new block, which in turn has a OpBranchSelection without a selection merge.
  16127. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
  16128. {
  16129. flush_undeclared_variables(block);
  16130. if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
  16131. {
  16132. skip_direct_branch = true;
  16133. emitted_loop_header_variables = true;
  16134. }
  16135. }
  16136. else if (continue_type == SPIRBlock::DoWhileLoop)
  16137. {
  16138. flush_undeclared_variables(block);
  16139. emit_while_loop_initializers(block);
  16140. emitted_loop_header_variables = true;
  16141. // We have some temporaries where the loop header is the dominator.
  16142. // We risk a case where we have code like:
  16143. // for (;;) { create-temporary; break; } consume-temporary;
  16144. // so force-declare temporaries here.
  16145. emit_hoisted_temporaries(block.potential_declare_temporary);
  16146. statement("do");
  16147. begin_scope();
  16148. emit_block_instructions(block);
  16149. }
  16150. else if (block.merge == SPIRBlock::MergeLoop)
  16151. {
  16152. flush_undeclared_variables(block);
  16153. emit_while_loop_initializers(block);
  16154. emitted_loop_header_variables = true;
  16155. // We have a generic loop without any distinguishable pattern like for, while or do while.
  16156. get<SPIRBlock>(block.continue_block).complex_continue = true;
  16157. continue_type = SPIRBlock::ComplexLoop;
  16158. // We have some temporaries where the loop header is the dominator.
  16159. // We risk a case where we have code like:
  16160. // for (;;) { create-temporary; break; } consume-temporary;
  16161. // so force-declare temporaries here.
  16162. emit_hoisted_temporaries(block.potential_declare_temporary);
  16163. emit_block_hints(block);
  16164. statement("for (;;)");
  16165. begin_scope();
  16166. emit_block_instructions(block);
  16167. }
  16168. else
  16169. {
  16170. emit_block_instructions(block);
  16171. }
  16172. // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
  16173. // as writes to said loop variables might have been masked out, we need a recompile.
  16174. if (!emitted_loop_header_variables && !block.loop_variables.empty())
  16175. {
  16176. force_recompile_guarantee_forward_progress();
  16177. for (auto var : block.loop_variables)
  16178. get<SPIRVariable>(var).loop_variable = false;
  16179. block.loop_variables.clear();
  16180. }
  16181. flush_undeclared_variables(block);
  16182. bool emit_next_block = true;
  16183. // Handle end of block.
  16184. switch (block.terminator)
  16185. {
  16186. case SPIRBlock::Direct:
  16187. // True when emitting complex continue block.
  16188. if (block.loop_dominator == block.next_block)
  16189. {
  16190. branch(block.self, block.next_block);
  16191. emit_next_block = false;
  16192. }
  16193. // True if MergeToDirectForLoop succeeded.
  16194. else if (skip_direct_branch)
  16195. emit_next_block = false;
  16196. else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
  16197. {
  16198. branch(block.self, block.next_block);
  16199. emit_next_block = false;
  16200. }
  16201. break;
  16202. case SPIRBlock::Select:
  16203. // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
  16204. if (select_branch_to_true_block)
  16205. {
  16206. if (force_complex_continue_block)
  16207. {
  16208. assert(block.true_block == block.continue_block);
  16209. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  16210. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  16211. bool old_complex = complex_continue;
  16212. complex_continue = true;
  16213. branch(block.self, block.true_block);
  16214. complex_continue = old_complex;
  16215. }
  16216. else
  16217. branch(block.self, block.true_block);
  16218. }
  16219. else if (select_branch_to_false_block)
  16220. {
  16221. if (force_complex_continue_block)
  16222. {
  16223. assert(block.false_block == block.continue_block);
  16224. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  16225. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  16226. bool old_complex = complex_continue;
  16227. complex_continue = true;
  16228. branch(block.self, block.false_block);
  16229. complex_continue = old_complex;
  16230. }
  16231. else
  16232. branch(block.self, block.false_block);
  16233. }
  16234. else
  16235. branch(block.self, block.condition, block.true_block, block.false_block);
  16236. break;
  16237. case SPIRBlock::MultiSelect:
  16238. {
  16239. auto &type = expression_type(block.condition);
  16240. bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
  16241. type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
  16242. if (block.merge == SPIRBlock::MergeNone)
  16243. SPIRV_CROSS_THROW("Switch statement is not structured");
  16244. if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
  16245. {
  16246. // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
  16247. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
  16248. }
  16249. const char *label_suffix = "";
  16250. if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
  16251. label_suffix = "u";
  16252. else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
  16253. label_suffix = "l";
  16254. else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
  16255. label_suffix = "ul";
  16256. else if (type.basetype == SPIRType::UShort)
  16257. label_suffix = backend.uint16_t_literal_suffix;
  16258. else if (type.basetype == SPIRType::Short)
  16259. label_suffix = backend.int16_t_literal_suffix;
  16260. current_emitting_switch_stack.push_back(&block);
  16261. if (block.need_ladder_break)
  16262. statement("bool _", block.self, "_ladder_break = false;");
  16263. // Find all unique case constructs.
  16264. unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
  16265. SmallVector<uint32_t> block_declaration_order;
  16266. SmallVector<uint64_t> literals_to_merge;
  16267. // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
  16268. // and let the default: block handle it.
  16269. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
  16270. // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
  16271. auto &cases = get_case_list(block);
  16272. for (auto &c : cases)
  16273. {
  16274. if (c.block != block.next_block && c.block != block.default_block)
  16275. {
  16276. if (!case_constructs.count(c.block))
  16277. block_declaration_order.push_back(c.block);
  16278. case_constructs[c.block].push_back(c.value);
  16279. }
  16280. else if (c.block == block.next_block && block.default_block != block.next_block)
  16281. {
  16282. // We might have to flush phi inside specific case labels.
  16283. // If we can piggyback on default:, do so instead.
  16284. literals_to_merge.push_back(c.value);
  16285. }
  16286. }
  16287. // Empty literal array -> default.
  16288. if (block.default_block != block.next_block)
  16289. {
  16290. auto &default_block = get<SPIRBlock>(block.default_block);
  16291. // We need to slide in the default block somewhere in this chain
  16292. // if there are fall-through scenarios since the default is declared separately in OpSwitch.
  16293. // Only consider trivial fall-through cases here.
  16294. size_t num_blocks = block_declaration_order.size();
  16295. bool injected_block = false;
  16296. for (size_t i = 0; i < num_blocks; i++)
  16297. {
  16298. auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
  16299. if (execution_is_direct_branch(case_block, default_block))
  16300. {
  16301. // Fallthrough to default block, we must inject the default block here.
  16302. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
  16303. injected_block = true;
  16304. break;
  16305. }
  16306. else if (execution_is_direct_branch(default_block, case_block))
  16307. {
  16308. // Default case is falling through to another case label, we must inject the default block here.
  16309. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
  16310. injected_block = true;
  16311. break;
  16312. }
  16313. }
  16314. // Order does not matter.
  16315. if (!injected_block)
  16316. block_declaration_order.push_back(block.default_block);
  16317. else if (is_legacy_es())
  16318. SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
  16319. case_constructs[block.default_block] = {};
  16320. }
  16321. size_t num_blocks = block_declaration_order.size();
  16322. const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
  16323. {
  16324. if (is_unsigned_case)
  16325. return convert_to_string(literal);
  16326. // For smaller cases, the literals are compiled as 32 bit wide
  16327. // literals so we don't need to care for all sizes specifically.
  16328. if (width <= 32)
  16329. {
  16330. return convert_to_string(int64_t(int32_t(literal)));
  16331. }
  16332. return convert_to_string(int64_t(literal));
  16333. };
  16334. const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
  16335. const char *suffix) -> string {
  16336. string ret;
  16337. size_t count = labels.size();
  16338. for (size_t i = 0; i < count; i++)
  16339. {
  16340. if (i)
  16341. ret += " || ";
  16342. ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
  16343. count > 1 ? ")" : "");
  16344. }
  16345. return ret;
  16346. };
  16347. // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
  16348. // we need to flush phi nodes outside the switch block in a branch,
  16349. // and skip any Phi handling inside the case label to make fall-through work as expected.
  16350. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
  16351. // inside the case label if at all possible.
  16352. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
  16353. {
  16354. if (flush_phi_required(block.self, block_declaration_order[i]) &&
  16355. flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
  16356. {
  16357. uint32_t target_block = block_declaration_order[i];
  16358. // Make sure we flush Phi, it might have been marked to be ignored earlier.
  16359. get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
  16360. auto &literals = case_constructs[target_block];
  16361. if (literals.empty())
  16362. {
  16363. // Oh boy, gotta make a complete negative test instead! o.o
  16364. // Find all possible literals that would *not* make us enter the default block.
  16365. // If none of those literals match, we flush Phi ...
  16366. SmallVector<string> conditions;
  16367. for (size_t j = 0; j < num_blocks; j++)
  16368. {
  16369. auto &negative_literals = case_constructs[block_declaration_order[j]];
  16370. for (auto &case_label : negative_literals)
  16371. conditions.push_back(join(to_enclosed_expression(block.condition),
  16372. " != ", to_case_label(case_label, type.width, unsigned_case)));
  16373. }
  16374. statement("if (", merge(conditions, " && "), ")");
  16375. begin_scope();
  16376. flush_phi(block.self, target_block);
  16377. end_scope();
  16378. }
  16379. else
  16380. {
  16381. SmallVector<string> conditions;
  16382. conditions.reserve(literals.size());
  16383. for (auto &case_label : literals)
  16384. conditions.push_back(join(to_enclosed_expression(block.condition),
  16385. " == ", to_case_label(case_label, type.width, unsigned_case)));
  16386. statement("if (", merge(conditions, " || "), ")");
  16387. begin_scope();
  16388. flush_phi(block.self, target_block);
  16389. end_scope();
  16390. }
  16391. // Mark the block so that we don't flush Phi from header to case label.
  16392. get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
  16393. }
  16394. }
  16395. // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
  16396. // non-structured exits with the help of a switch block.
  16397. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
  16398. bool block_like_switch = cases.empty();
  16399. // If this is true, the switch is completely meaningless, and we should just avoid it.
  16400. bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
  16401. if (!collapsed_switch)
  16402. {
  16403. if (block_like_switch || is_legacy())
  16404. {
  16405. // ESSL 1.0 is not guaranteed to support do/while.
  16406. if (is_legacy_es())
  16407. {
  16408. uint32_t counter = statement_count;
  16409. statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter,
  16410. "++)");
  16411. }
  16412. else
  16413. statement("do");
  16414. }
  16415. else
  16416. {
  16417. emit_block_hints(block);
  16418. statement("switch (", to_unpacked_expression(block.condition), ")");
  16419. }
  16420. begin_scope();
  16421. }
  16422. for (size_t i = 0; i < num_blocks; i++)
  16423. {
  16424. uint32_t target_block = block_declaration_order[i];
  16425. auto &literals = case_constructs[target_block];
  16426. if (literals.empty())
  16427. {
  16428. // Default case.
  16429. if (!block_like_switch)
  16430. {
  16431. if (is_legacy())
  16432. statement("else");
  16433. else
  16434. statement("default:");
  16435. }
  16436. }
  16437. else
  16438. {
  16439. if (is_legacy())
  16440. {
  16441. statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
  16442. ")");
  16443. }
  16444. else
  16445. {
  16446. for (auto &case_literal : literals)
  16447. {
  16448. // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
  16449. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  16450. }
  16451. }
  16452. }
  16453. auto &case_block = get<SPIRBlock>(target_block);
  16454. if (backend.support_case_fallthrough && i + 1 < num_blocks &&
  16455. execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
  16456. {
  16457. // We will fall through here, so just terminate the block chain early.
  16458. // We still need to deal with Phi potentially.
  16459. // No need for a stack-like thing here since we only do fall-through when there is a
  16460. // single trivial branch to fall-through target..
  16461. current_emitting_switch_fallthrough = true;
  16462. }
  16463. else
  16464. current_emitting_switch_fallthrough = false;
  16465. if (!block_like_switch)
  16466. begin_scope();
  16467. branch(block.self, target_block);
  16468. if (!block_like_switch)
  16469. end_scope();
  16470. current_emitting_switch_fallthrough = false;
  16471. }
  16472. // Might still have to flush phi variables if we branch from loop header directly to merge target.
  16473. // This is supposed to emit all cases where we branch from header to merge block directly.
  16474. // There are two main scenarios where cannot rely on default fallthrough.
  16475. // - There is an explicit default: label already.
  16476. // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
  16477. // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
  16478. bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
  16479. bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
  16480. if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
  16481. {
  16482. for (auto &case_literal : literals_to_merge)
  16483. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  16484. if (block.default_block == block.next_block)
  16485. {
  16486. if (is_legacy())
  16487. statement("else");
  16488. else
  16489. statement("default:");
  16490. }
  16491. begin_scope();
  16492. flush_phi(block.self, block.next_block);
  16493. statement("break;");
  16494. end_scope();
  16495. }
  16496. if (!collapsed_switch)
  16497. {
  16498. if ((block_like_switch || is_legacy()) && !is_legacy_es())
  16499. end_scope_decl("while(false)");
  16500. else
  16501. end_scope();
  16502. }
  16503. else
  16504. flush_phi(block.self, block.next_block);
  16505. if (block.need_ladder_break)
  16506. {
  16507. statement("if (_", block.self, "_ladder_break)");
  16508. begin_scope();
  16509. statement("break;");
  16510. end_scope();
  16511. }
  16512. current_emitting_switch_stack.pop_back();
  16513. break;
  16514. }
  16515. case SPIRBlock::Return:
  16516. {
  16517. for (auto &line : current_function->fixup_hooks_out)
  16518. line();
  16519. if (processing_entry_point)
  16520. emit_fixup();
  16521. auto &cfg = get_cfg_for_current_function();
  16522. if (block.return_value)
  16523. {
  16524. auto &type = expression_type(block.return_value);
  16525. if (!type.array.empty() && !backend.can_return_array)
  16526. {
  16527. // If we cannot return arrays, we will have a special out argument we can write to instead.
  16528. // The backend is responsible for setting this up, and redirection the return values as appropriate.
  16529. if (ir.ids[block.return_value].get_type() != TypeUndef)
  16530. {
  16531. emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
  16532. get_expression_effective_storage_class(block.return_value));
  16533. }
  16534. if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  16535. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  16536. {
  16537. statement("return;");
  16538. }
  16539. }
  16540. else
  16541. {
  16542. // OpReturnValue can return Undef, so don't emit anything for this case.
  16543. if (ir.ids[block.return_value].get_type() != TypeUndef)
  16544. statement("return ", to_unpacked_expression(block.return_value), ";");
  16545. }
  16546. }
  16547. else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  16548. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  16549. {
  16550. // If this block is the very final block and not called from control flow,
  16551. // we do not need an explicit return which looks out of place. Just end the function here.
  16552. // In the very weird case of for(;;) { return; } executing return is unconditional,
  16553. // but we actually need a return here ...
  16554. statement("return;");
  16555. }
  16556. break;
  16557. }
  16558. // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
  16559. case SPIRBlock::Kill:
  16560. statement(backend.discard_literal, ";");
  16561. if (block.return_value)
  16562. statement("return ", to_unpacked_expression(block.return_value), ";");
  16563. break;
  16564. case SPIRBlock::Unreachable:
  16565. {
  16566. // If the entry point ends with unreachable and has a return value, insert a return
  16567. // statement to avoid potential compiler errors from non-void functions without a return value.
  16568. if (block.return_value)
  16569. {
  16570. statement("return ", to_unpacked_expression(block.return_value), ";");
  16571. break;
  16572. }
  16573. // Avoid emitting false fallthrough, which can happen for
  16574. // if (cond) break; else discard; inside a case label.
  16575. // Discard is not always implementable as a terminator.
  16576. auto &cfg = get_cfg_for_current_function();
  16577. bool inner_dominator_is_switch = false;
  16578. ID id = block.self;
  16579. while (id)
  16580. {
  16581. auto &iter_block = get<SPIRBlock>(id);
  16582. if (iter_block.terminator == SPIRBlock::MultiSelect ||
  16583. iter_block.merge == SPIRBlock::MergeLoop)
  16584. {
  16585. ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
  16586. iter_block.merge_block : iter_block.next_block;
  16587. bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block;
  16588. if (!outside_construct)
  16589. {
  16590. inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
  16591. break;
  16592. }
  16593. }
  16594. if (cfg.get_preceding_edges(id).empty())
  16595. break;
  16596. id = cfg.get_immediate_dominator(id);
  16597. }
  16598. if (inner_dominator_is_switch)
  16599. statement("break; // unreachable workaround");
  16600. emit_next_block = false;
  16601. break;
  16602. }
  16603. case SPIRBlock::IgnoreIntersection:
  16604. statement("ignoreIntersectionEXT;");
  16605. break;
  16606. case SPIRBlock::TerminateRay:
  16607. statement("terminateRayEXT;");
  16608. break;
  16609. case SPIRBlock::EmitMeshTasks:
  16610. emit_mesh_tasks(block);
  16611. break;
  16612. default:
  16613. SPIRV_CROSS_THROW("Unimplemented block terminator.");
  16614. }
  16615. BlockID trailing_block_id = 0;
  16616. if (block.next_block && emit_next_block)
  16617. {
  16618. // If we hit this case, we're dealing with an unconditional branch, which means we will output
  16619. // that block after this. If we had selection merge, we already flushed phi variables.
  16620. if (block.merge != SPIRBlock::MergeSelection)
  16621. {
  16622. flush_phi(block.self, block.next_block);
  16623. // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
  16624. get<SPIRBlock>(block.next_block).invalidate_expressions.clear();
  16625. std::swap(get<SPIRBlock>(block.next_block).invalidate_expressions, block.invalidate_expressions);
  16626. }
  16627. // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
  16628. if (!current_emitting_switch_fallthrough)
  16629. {
  16630. // For merge selects we might have ignored the fact that a merge target
  16631. // could have been a break; or continue;
  16632. // We will need to deal with it here.
  16633. if (is_loop_break(block.next_block))
  16634. {
  16635. // Cannot check for just break, because switch statements will also use break.
  16636. assert(block.merge == SPIRBlock::MergeSelection);
  16637. statement("break;");
  16638. }
  16639. else if (is_continue(block.next_block))
  16640. {
  16641. assert(block.merge == SPIRBlock::MergeSelection);
  16642. branch_to_continue(block.self, block.next_block);
  16643. }
  16644. else if (BlockID(block.self) != block.next_block)
  16645. {
  16646. // Recursing here is quite scary since it's quite easy to stack overflow if
  16647. // the SPIR-V is constructed a particular way.
  16648. // We have to simulate the tail call ourselves.
  16649. if (block.merge != SPIRBlock::MergeLoop)
  16650. trailing_block_id = block.next_block;
  16651. else
  16652. emit_block_chain(get<SPIRBlock>(block.next_block));
  16653. }
  16654. }
  16655. }
  16656. if (block.merge == SPIRBlock::MergeLoop)
  16657. {
  16658. if (continue_type == SPIRBlock::DoWhileLoop)
  16659. {
  16660. // Make sure that we run the continue block to get the expressions set, but this
  16661. // should become an empty string.
  16662. // We have no fallbacks if we cannot forward everything to temporaries ...
  16663. const auto &continue_block = get<SPIRBlock>(block.continue_block);
  16664. bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
  16665. get<SPIRBlock>(continue_block.loop_dominator));
  16666. uint32_t current_count = statement_count;
  16667. auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
  16668. if (statement_count != current_count)
  16669. {
  16670. // The DoWhile block has side effects, force ComplexLoop pattern next pass.
  16671. get<SPIRBlock>(block.continue_block).complex_continue = true;
  16672. force_recompile();
  16673. }
  16674. // Might have to invert the do-while test here.
  16675. auto condition = to_expression(continue_block.condition);
  16676. if (!positive_test)
  16677. condition = join("!", enclose_expression(condition));
  16678. end_scope_decl(join("while (", condition, ")"));
  16679. }
  16680. else
  16681. end_scope();
  16682. loop_level_saver.release();
  16683. // We cannot break out of two loops at once, so don't check for break; here.
  16684. // Using block.self as the "from" block isn't quite right, but it has the same scope
  16685. // and dominance structure, so it's fine.
  16686. if (is_continue(block.merge_block))
  16687. branch_to_continue(block.self, block.merge_block);
  16688. else
  16689. trailing_block_id = block.merge_block;
  16690. }
  16691. return trailing_block_id;
  16692. }
  16693. void CompilerGLSL::emit_block_chain_cleanup(SPIRBlock &block)
  16694. {
  16695. // Forget about control dependent expressions now.
  16696. block.invalidate_expressions.clear();
  16697. // After we return, we must be out of scope, so if we somehow have to re-emit this block,
  16698. // re-declare variables if necessary.
  16699. // We only need one array here for rearm_dominated_variables,
  16700. // since it should be impossible for the same block to be remitted in the same chain twice.
  16701. assert(block.rearm_dominated_variables.size() == block.dominated_variables.size());
  16702. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  16703. {
  16704. uint32_t var = block.dominated_variables[i];
  16705. get<SPIRVariable>(var).deferred_declaration = block.rearm_dominated_variables[i];
  16706. }
  16707. block.rearm_dominated_variables.clear();
  16708. // Just like for deferred declaration, we need to forget about loop variable enable
  16709. // if our block chain is reinstantiated later.
  16710. for (auto &var_id : block.loop_variables)
  16711. get<SPIRVariable>(var_id).loop_variable_enable = false;
  16712. }
  16713. void CompilerGLSL::begin_scope()
  16714. {
  16715. statement("{");
  16716. indent++;
  16717. }
  16718. void CompilerGLSL::end_scope()
  16719. {
  16720. if (!indent)
  16721. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16722. indent--;
  16723. statement("}");
  16724. }
  16725. void CompilerGLSL::end_scope(const string &trailer)
  16726. {
  16727. if (!indent)
  16728. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16729. indent--;
  16730. statement("}", trailer);
  16731. }
  16732. void CompilerGLSL::end_scope_decl()
  16733. {
  16734. if (!indent)
  16735. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16736. indent--;
  16737. statement("};");
  16738. }
  16739. void CompilerGLSL::end_scope_decl(const string &decl)
  16740. {
  16741. if (!indent)
  16742. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16743. indent--;
  16744. statement("} ", decl, ";");
  16745. }
  16746. void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
  16747. {
  16748. // If our variable is remapped, and we rely on type-remapping information as
  16749. // well, then we cannot pass the variable as a function parameter.
  16750. // Fixing this is non-trivial without stamping out variants of the same function,
  16751. // so for now warn about this and suggest workarounds instead.
  16752. for (uint32_t i = 0; i < length; i++)
  16753. {
  16754. auto *var = maybe_get<SPIRVariable>(args[i]);
  16755. if (!var || !var->remapped_variable)
  16756. continue;
  16757. auto &type = get<SPIRType>(var->basetype);
  16758. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  16759. {
  16760. SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
  16761. "This will not work correctly because type-remapping information is lost. "
  16762. "To workaround, please consider not passing the subpass input as a function parameter, "
  16763. "or use in/out variables instead which do not need type remapping information.");
  16764. }
  16765. }
  16766. }
  16767. const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
  16768. {
  16769. // FIXME: This is kind of hacky. There should be a cleaner way.
  16770. auto offset = uint32_t(&instr - current_emitting_block->ops.data());
  16771. if ((offset + 1) < current_emitting_block->ops.size())
  16772. return &current_emitting_block->ops[offset + 1];
  16773. else
  16774. return nullptr;
  16775. }
  16776. uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
  16777. {
  16778. return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
  16779. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
  16780. MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
  16781. }
  16782. bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass)
  16783. {
  16784. string lhs;
  16785. if (expr)
  16786. lhs = expr;
  16787. else
  16788. lhs = to_expression(lhs_id);
  16789. statement(lhs, " = ", to_expression(rhs_id), ";");
  16790. return true;
  16791. }
  16792. bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
  16793. {
  16794. if (!backend.force_gl_in_out_block)
  16795. return false;
  16796. // This path is only relevant for GL backends.
  16797. auto *var = maybe_get<SPIRVariable>(target_id);
  16798. if (!var || var->storage != StorageClassOutput)
  16799. return false;
  16800. if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
  16801. return false;
  16802. auto &type = expression_type(source_id);
  16803. string array_expr;
  16804. if (type.array_size_literal.back())
  16805. {
  16806. array_expr = convert_to_string(type.array.back());
  16807. if (type.array.back() == 0)
  16808. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  16809. }
  16810. else
  16811. array_expr = to_expression(type.array.back());
  16812. SPIRType target_type { OpTypeInt };
  16813. target_type.basetype = SPIRType::Int;
  16814. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  16815. begin_scope();
  16816. statement(to_expression(target_id), "[i] = ",
  16817. bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
  16818. ";");
  16819. end_scope();
  16820. return true;
  16821. }
  16822. void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
  16823. {
  16824. if (!backend.force_gl_in_out_block)
  16825. return;
  16826. // This path is only relevant for GL backends.
  16827. auto *var = maybe_get<SPIRVariable>(source_id);
  16828. if (!var)
  16829. return;
  16830. if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
  16831. return;
  16832. auto &type = get_variable_data_type(*var);
  16833. if (type.array.empty())
  16834. return;
  16835. auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  16836. bool is_builtin = is_builtin_variable(*var) &&
  16837. (builtin == BuiltInPointSize ||
  16838. builtin == BuiltInPosition ||
  16839. builtin == BuiltInSampleMask);
  16840. bool is_tess = is_tessellation_shader();
  16841. bool is_patch = has_decoration(var->self, DecorationPatch);
  16842. bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
  16843. // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
  16844. // We must unroll the array load.
  16845. // For builtins, we couldn't catch this case normally,
  16846. // because this is resolved in the OpAccessChain in most cases.
  16847. // If we load the entire array, we have no choice but to unroll here.
  16848. if (!is_patch && (is_builtin || is_tess))
  16849. {
  16850. auto new_expr = join("_", target_id, "_unrolled");
  16851. statement(variable_decl(type, new_expr, target_id), ";");
  16852. string array_expr;
  16853. if (type.array_size_literal.back())
  16854. {
  16855. array_expr = convert_to_string(type.array.back());
  16856. if (type.array.back() == 0)
  16857. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  16858. }
  16859. else
  16860. array_expr = to_expression(type.array.back());
  16861. // The array size might be a specialization constant, so use a for-loop instead.
  16862. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  16863. begin_scope();
  16864. if (is_builtin && !is_sample_mask)
  16865. statement(new_expr, "[i] = gl_in[i].", expr, ";");
  16866. else if (is_sample_mask)
  16867. {
  16868. SPIRType target_type { OpTypeInt };
  16869. target_type.basetype = SPIRType::Int;
  16870. statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
  16871. }
  16872. else
  16873. statement(new_expr, "[i] = ", expr, "[i];");
  16874. end_scope();
  16875. expr = std::move(new_expr);
  16876. }
  16877. }
  16878. void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  16879. {
  16880. // We will handle array cases elsewhere.
  16881. if (!expr_type.array.empty())
  16882. return;
  16883. auto *var = maybe_get_backing_variable(source_id);
  16884. if (var)
  16885. source_id = var->self;
  16886. // Only interested in standalone builtin variables.
  16887. if (!has_decoration(source_id, DecorationBuiltIn))
  16888. {
  16889. // Except for int attributes in legacy GLSL, which are cast from float.
  16890. if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput)
  16891. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  16892. return;
  16893. }
  16894. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  16895. auto expected_type = expr_type.basetype;
  16896. // TODO: Fill in for more builtins.
  16897. switch (builtin)
  16898. {
  16899. case BuiltInLayer:
  16900. case BuiltInPrimitiveId:
  16901. case BuiltInViewportIndex:
  16902. case BuiltInInstanceId:
  16903. case BuiltInInstanceIndex:
  16904. case BuiltInVertexId:
  16905. case BuiltInVertexIndex:
  16906. case BuiltInSampleId:
  16907. case BuiltInBaseVertex:
  16908. case BuiltInBaseInstance:
  16909. case BuiltInDrawIndex:
  16910. case BuiltInFragStencilRefEXT:
  16911. case BuiltInInstanceCustomIndexNV:
  16912. case BuiltInSampleMask:
  16913. case BuiltInPrimitiveShadingRateKHR:
  16914. case BuiltInShadingRateKHR:
  16915. expected_type = SPIRType::Int;
  16916. break;
  16917. case BuiltInGlobalInvocationId:
  16918. case BuiltInLocalInvocationId:
  16919. case BuiltInWorkgroupId:
  16920. case BuiltInLocalInvocationIndex:
  16921. case BuiltInWorkgroupSize:
  16922. case BuiltInNumWorkgroups:
  16923. case BuiltInIncomingRayFlagsNV:
  16924. case BuiltInLaunchIdNV:
  16925. case BuiltInLaunchSizeNV:
  16926. case BuiltInPrimitiveTriangleIndicesEXT:
  16927. case BuiltInPrimitiveLineIndicesEXT:
  16928. case BuiltInPrimitivePointIndicesEXT:
  16929. expected_type = SPIRType::UInt;
  16930. break;
  16931. default:
  16932. break;
  16933. }
  16934. if (expected_type != expr_type.basetype)
  16935. expr = bitcast_expression(expr_type, expected_type, expr);
  16936. }
  16937. SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type)
  16938. {
  16939. // TODO: Fill in for more builtins.
  16940. switch (builtin)
  16941. {
  16942. case BuiltInLayer:
  16943. case BuiltInPrimitiveId:
  16944. case BuiltInViewportIndex:
  16945. case BuiltInFragStencilRefEXT:
  16946. case BuiltInSampleMask:
  16947. case BuiltInPrimitiveShadingRateKHR:
  16948. case BuiltInShadingRateKHR:
  16949. return SPIRType::Int;
  16950. default:
  16951. return default_type;
  16952. }
  16953. }
  16954. void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  16955. {
  16956. auto *var = maybe_get_backing_variable(target_id);
  16957. if (var)
  16958. target_id = var->self;
  16959. // Only interested in standalone builtin variables.
  16960. if (!has_decoration(target_id, DecorationBuiltIn))
  16961. return;
  16962. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  16963. auto expected_type = get_builtin_basetype(builtin, expr_type.basetype);
  16964. if (expected_type != expr_type.basetype)
  16965. {
  16966. auto type = expr_type;
  16967. type.basetype = expected_type;
  16968. expr = bitcast_expression(type, expr_type.basetype, expr);
  16969. }
  16970. }
  16971. void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
  16972. {
  16973. if (*backend.nonuniform_qualifier == '\0')
  16974. return;
  16975. auto *var = maybe_get_backing_variable(ptr_id);
  16976. if (!var)
  16977. return;
  16978. if (var->storage != StorageClassUniformConstant &&
  16979. var->storage != StorageClassStorageBuffer &&
  16980. var->storage != StorageClassUniform)
  16981. return;
  16982. auto &backing_type = get<SPIRType>(var->basetype);
  16983. if (backing_type.array.empty())
  16984. return;
  16985. // If we get here, we know we're accessing an arrayed resource which
  16986. // might require nonuniform qualifier.
  16987. auto start_array_index = expr.find_first_of('[');
  16988. if (start_array_index == string::npos)
  16989. return;
  16990. // We've opened a bracket, track expressions until we can close the bracket.
  16991. // This must be our resource index.
  16992. size_t end_array_index = string::npos;
  16993. unsigned bracket_count = 1;
  16994. for (size_t index = start_array_index + 1; index < expr.size(); index++)
  16995. {
  16996. if (expr[index] == ']')
  16997. {
  16998. if (--bracket_count == 0)
  16999. {
  17000. end_array_index = index;
  17001. break;
  17002. }
  17003. }
  17004. else if (expr[index] == '[')
  17005. bracket_count++;
  17006. }
  17007. assert(bracket_count == 0);
  17008. // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
  17009. // nothing we can do here to express that.
  17010. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
  17011. return;
  17012. start_array_index++;
  17013. expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
  17014. expr.substr(start_array_index, end_array_index - start_array_index), ")",
  17015. expr.substr(end_array_index, string::npos));
  17016. }
  17017. void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
  17018. {
  17019. if ((options.es && options.version < 310) || (!options.es && options.version < 140))
  17020. return;
  17021. switch (block.hint)
  17022. {
  17023. case SPIRBlock::HintFlatten:
  17024. require_extension_internal("GL_EXT_control_flow_attributes");
  17025. statement("SPIRV_CROSS_FLATTEN");
  17026. break;
  17027. case SPIRBlock::HintDontFlatten:
  17028. require_extension_internal("GL_EXT_control_flow_attributes");
  17029. statement("SPIRV_CROSS_BRANCH");
  17030. break;
  17031. case SPIRBlock::HintUnroll:
  17032. require_extension_internal("GL_EXT_control_flow_attributes");
  17033. statement("SPIRV_CROSS_UNROLL");
  17034. break;
  17035. case SPIRBlock::HintDontUnroll:
  17036. require_extension_internal("GL_EXT_control_flow_attributes");
  17037. statement("SPIRV_CROSS_LOOP");
  17038. break;
  17039. default:
  17040. break;
  17041. }
  17042. }
  17043. void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
  17044. {
  17045. preserved_aliases[id] = get_name(id);
  17046. }
  17047. void CompilerGLSL::reset_name_caches()
  17048. {
  17049. for (auto &preserved : preserved_aliases)
  17050. set_name(preserved.first, preserved.second);
  17051. preserved_aliases.clear();
  17052. resource_names.clear();
  17053. block_input_names.clear();
  17054. block_output_names.clear();
  17055. block_ubo_names.clear();
  17056. block_ssbo_names.clear();
  17057. block_names.clear();
  17058. function_overloads.clear();
  17059. }
  17060. void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
  17061. {
  17062. if (visited.count(type.self))
  17063. return;
  17064. visited.insert(type.self);
  17065. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  17066. {
  17067. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  17068. if (mbr_type.basetype == SPIRType::Struct)
  17069. {
  17070. // If there are multiple aliases, the output might be somewhat unpredictable,
  17071. // but the only real alternative in that case is to do nothing, which isn't any better.
  17072. // This check should be fine in practice.
  17073. if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty())
  17074. {
  17075. auto anon_name = join("anon_", get_member_name(type.self, i));
  17076. ParsedIR::sanitize_underscores(anon_name);
  17077. set_name(mbr_type.self, anon_name);
  17078. }
  17079. fixup_anonymous_struct_names(visited, mbr_type);
  17080. }
  17081. }
  17082. }
  17083. void CompilerGLSL::fixup_anonymous_struct_names()
  17084. {
  17085. // HLSL codegen can often end up emitting anonymous structs inside blocks, which
  17086. // breaks GL linking since all names must match ...
  17087. // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
  17088. // Breaks exponential explosion with weird type trees.
  17089. std::unordered_set<uint32_t> visited;
  17090. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  17091. if (type.basetype == SPIRType::Struct &&
  17092. (has_decoration(type.self, DecorationBlock) ||
  17093. has_decoration(type.self, DecorationBufferBlock)))
  17094. {
  17095. fixup_anonymous_struct_names(visited, type);
  17096. }
  17097. });
  17098. }
  17099. void CompilerGLSL::fixup_type_alias()
  17100. {
  17101. // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
  17102. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  17103. if (!type.type_alias)
  17104. return;
  17105. if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
  17106. {
  17107. // Top-level block types should never alias anything else.
  17108. type.type_alias = 0;
  17109. }
  17110. else if (type_is_block_like(type) && type.self == ID(self))
  17111. {
  17112. // A block-like type is any type which contains Offset decoration, but not top-level blocks,
  17113. // i.e. blocks which are placed inside buffers.
  17114. // Become the master.
  17115. ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
  17116. if (other_id == self)
  17117. return;
  17118. if (other_type.type_alias == type.type_alias)
  17119. other_type.type_alias = self;
  17120. });
  17121. this->get<SPIRType>(type.type_alias).type_alias = self;
  17122. type.type_alias = 0;
  17123. }
  17124. });
  17125. }
  17126. void CompilerGLSL::reorder_type_alias()
  17127. {
  17128. // Reorder declaration of types so that the master of the type alias is always emitted first.
  17129. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
  17130. // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
  17131. auto loop_lock = ir.create_loop_hard_lock();
  17132. auto &type_ids = ir.ids_for_type[TypeType];
  17133. for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
  17134. {
  17135. auto &type = get<SPIRType>(*alias_itr);
  17136. if (type.type_alias != TypeID(0) &&
  17137. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  17138. {
  17139. // We will skip declaring this type, so make sure the type_alias type comes before.
  17140. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
  17141. assert(master_itr != end(type_ids));
  17142. if (alias_itr < master_itr)
  17143. {
  17144. // Must also swap the type order for the constant-type joined array.
  17145. auto &joined_types = ir.ids_for_constant_undef_or_type;
  17146. auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
  17147. auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
  17148. assert(alt_alias_itr != end(joined_types));
  17149. assert(alt_master_itr != end(joined_types));
  17150. swap(*alias_itr, *master_itr);
  17151. swap(*alt_alias_itr, *alt_master_itr);
  17152. }
  17153. }
  17154. }
  17155. }
  17156. void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
  17157. {
  17158. // If we are redirecting statements, ignore the line directive.
  17159. // Common case here is continue blocks.
  17160. if (redirect_statement)
  17161. return;
  17162. // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit
  17163. // any line directives, because it's not possible.
  17164. if (block_debug_directives)
  17165. return;
  17166. if (options.emit_line_directives)
  17167. {
  17168. require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
  17169. statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
  17170. }
  17171. }
  17172. void CompilerGLSL::emit_non_semantic_shader_debug_info(uint32_t, uint32_t result_id, uint32_t eop,
  17173. const uint32_t *args, uint32_t)
  17174. {
  17175. if (!options.emit_line_directives)
  17176. return;
  17177. switch (eop)
  17178. {
  17179. case SPIRExtension::DebugLine:
  17180. {
  17181. // We're missing line end and columns here, but I don't think we can emit those in any meaningful way.
  17182. emit_line_directive(args[0], get<SPIRConstant>(args[1]).scalar());
  17183. break;
  17184. }
  17185. case SPIRExtension::DebugSource:
  17186. {
  17187. // Forward the string declaration here. We ignore the optional text operand.
  17188. auto &str = get<SPIRString>(args[0]).str;
  17189. set<SPIRString>(result_id, str);
  17190. break;
  17191. }
  17192. default:
  17193. break;
  17194. }
  17195. }
  17196. void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
  17197. SmallVector<uint32_t> chain)
  17198. {
  17199. // Fully unroll all member/array indices one by one.
  17200. auto &lhs_type = get<SPIRType>(lhs_type_id);
  17201. auto &rhs_type = get<SPIRType>(rhs_type_id);
  17202. if (!lhs_type.array.empty())
  17203. {
  17204. // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
  17205. // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
  17206. uint32_t array_size = to_array_size_literal(lhs_type);
  17207. chain.push_back(0);
  17208. for (uint32_t i = 0; i < array_size; i++)
  17209. {
  17210. chain.back() = i;
  17211. emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
  17212. }
  17213. }
  17214. else if (lhs_type.basetype == SPIRType::Struct)
  17215. {
  17216. chain.push_back(0);
  17217. uint32_t member_count = uint32_t(lhs_type.member_types.size());
  17218. for (uint32_t i = 0; i < member_count; i++)
  17219. {
  17220. chain.back() = i;
  17221. emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
  17222. }
  17223. }
  17224. else
  17225. {
  17226. // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
  17227. // particularly in MSL.
  17228. // To deal with this, we emit access chains and go through emit_store_statement
  17229. // to deal with all the special cases we can encounter.
  17230. AccessChainMeta lhs_meta, rhs_meta;
  17231. auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
  17232. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
  17233. auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
  17234. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
  17235. uint32_t id = ir.increase_bound_by(2);
  17236. lhs_id = id;
  17237. rhs_id = id + 1;
  17238. {
  17239. auto &lhs_expr = set<SPIRExpression>(lhs_id, std::move(lhs), lhs_type_id, true);
  17240. lhs_expr.need_transpose = lhs_meta.need_transpose;
  17241. if (lhs_meta.storage_is_packed)
  17242. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  17243. if (lhs_meta.storage_physical_type != 0)
  17244. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
  17245. forwarded_temporaries.insert(lhs_id);
  17246. suppressed_usage_tracking.insert(lhs_id);
  17247. }
  17248. {
  17249. auto &rhs_expr = set<SPIRExpression>(rhs_id, std::move(rhs), rhs_type_id, true);
  17250. rhs_expr.need_transpose = rhs_meta.need_transpose;
  17251. if (rhs_meta.storage_is_packed)
  17252. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  17253. if (rhs_meta.storage_physical_type != 0)
  17254. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
  17255. forwarded_temporaries.insert(rhs_id);
  17256. suppressed_usage_tracking.insert(rhs_id);
  17257. }
  17258. emit_store_statement(lhs_id, rhs_id);
  17259. }
  17260. }
  17261. bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
  17262. {
  17263. if (!has_decoration(id, DecorationInputAttachmentIndex))
  17264. return false;
  17265. uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
  17266. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  17267. if (remap.first == input_attachment_index)
  17268. return true;
  17269. return false;
  17270. }
  17271. const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
  17272. {
  17273. const SPIRVariable *ret = nullptr;
  17274. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  17275. if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
  17276. get_decoration(var.self, DecorationInputAttachmentIndex) == index)
  17277. {
  17278. ret = &var;
  17279. }
  17280. });
  17281. return ret;
  17282. }
  17283. const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
  17284. {
  17285. const SPIRVariable *ret = nullptr;
  17286. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  17287. if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
  17288. ret = &var;
  17289. });
  17290. return ret;
  17291. }
  17292. void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
  17293. {
  17294. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  17295. {
  17296. auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
  17297. auto *output_var = find_color_output_by_location(remap.second);
  17298. if (!subpass_var)
  17299. continue;
  17300. if (!output_var)
  17301. SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
  17302. "to read from it.");
  17303. if (is_array(get<SPIRType>(output_var->basetype)))
  17304. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
  17305. auto &func = get<SPIRFunction>(get_entry_point().self);
  17306. func.fixup_hooks_in.push_back([=]() {
  17307. if (is_legacy())
  17308. {
  17309. statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
  17310. get_decoration(output_var->self, DecorationLocation), "];");
  17311. }
  17312. else
  17313. {
  17314. uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
  17315. statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
  17316. to_expression(output_var->self), ";");
  17317. }
  17318. });
  17319. }
  17320. }
  17321. bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
  17322. {
  17323. return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
  17324. }
  17325. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
  17326. {
  17327. static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
  17328. "GL_KHR_shader_subgroup_basic",
  17329. "GL_KHR_shader_subgroup_vote",
  17330. "GL_KHR_shader_subgroup_arithmetic",
  17331. "GL_NV_gpu_shader_5",
  17332. "GL_NV_shader_thread_group",
  17333. "GL_NV_shader_thread_shuffle",
  17334. "GL_ARB_shader_ballot",
  17335. "GL_ARB_shader_group_vote",
  17336. "GL_AMD_gcn_shader" };
  17337. return retval[c];
  17338. }
  17339. SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
  17340. {
  17341. switch (c)
  17342. {
  17343. case ARB_shader_ballot:
  17344. return { "GL_ARB_shader_int64" };
  17345. case AMD_gcn_shader:
  17346. return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
  17347. default:
  17348. return {};
  17349. }
  17350. }
  17351. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
  17352. {
  17353. switch (c)
  17354. {
  17355. case ARB_shader_ballot:
  17356. return "defined(GL_ARB_shader_int64)";
  17357. case AMD_gcn_shader:
  17358. return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
  17359. default:
  17360. return "";
  17361. }
  17362. }
  17363. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17364. get_feature_dependencies(Feature feature)
  17365. {
  17366. switch (feature)
  17367. {
  17368. case SubgroupAllEqualT:
  17369. return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
  17370. case SubgroupElect:
  17371. return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
  17372. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  17373. return { SubgroupMask };
  17374. case SubgroupBallotBitCount:
  17375. return { SubgroupBallot };
  17376. case SubgroupArithmeticIAddReduce:
  17377. case SubgroupArithmeticIAddInclusiveScan:
  17378. case SubgroupArithmeticFAddReduce:
  17379. case SubgroupArithmeticFAddInclusiveScan:
  17380. case SubgroupArithmeticIMulReduce:
  17381. case SubgroupArithmeticIMulInclusiveScan:
  17382. case SubgroupArithmeticFMulReduce:
  17383. case SubgroupArithmeticFMulInclusiveScan:
  17384. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
  17385. case SubgroupArithmeticIAddExclusiveScan:
  17386. case SubgroupArithmeticFAddExclusiveScan:
  17387. case SubgroupArithmeticIMulExclusiveScan:
  17388. case SubgroupArithmeticFMulExclusiveScan:
  17389. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
  17390. SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
  17391. default:
  17392. return {};
  17393. }
  17394. }
  17395. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
  17396. get_feature_dependency_mask(Feature feature)
  17397. {
  17398. return build_mask(get_feature_dependencies(feature));
  17399. }
  17400. bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
  17401. {
  17402. static const bool retval[FeatureCount] = {
  17403. false, false, false, false, false, false,
  17404. true, // SubgroupBalloFindLSB_MSB
  17405. false, false, false, false,
  17406. true, // SubgroupMemBarrier - replaced with workgroup memory barriers
  17407. false, false, true, false,
  17408. false, false, false, false, false, false, // iadd, fadd
  17409. false, false, false, false, false, false, // imul , fmul
  17410. };
  17411. return retval[feature];
  17412. }
  17413. CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
  17414. get_KHR_extension_for_feature(Feature feature)
  17415. {
  17416. static const Candidate extensions[FeatureCount] = {
  17417. KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  17418. KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
  17419. KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  17420. KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
  17421. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17422. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17423. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17424. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17425. };
  17426. return extensions[feature];
  17427. }
  17428. void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
  17429. {
  17430. feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
  17431. }
  17432. bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
  17433. {
  17434. return (feature_mask & (1u << feature)) != 0;
  17435. }
  17436. CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
  17437. {
  17438. Result res;
  17439. for (uint32_t i = 0u; i < FeatureCount; ++i)
  17440. {
  17441. if (feature_mask & (1u << i))
  17442. {
  17443. auto feature = static_cast<Feature>(i);
  17444. std::unordered_set<uint32_t> unique_candidates;
  17445. auto candidates = get_candidates_for_feature(feature);
  17446. unique_candidates.insert(candidates.begin(), candidates.end());
  17447. auto deps = get_feature_dependencies(feature);
  17448. for (Feature d : deps)
  17449. {
  17450. candidates = get_candidates_for_feature(d);
  17451. if (!candidates.empty())
  17452. unique_candidates.insert(candidates.begin(), candidates.end());
  17453. }
  17454. for (uint32_t c : unique_candidates)
  17455. ++res.weights[static_cast<Candidate>(c)];
  17456. }
  17457. }
  17458. return res;
  17459. }
  17460. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17461. get_candidates_for_feature(Feature ft, const Result &r)
  17462. {
  17463. auto c = get_candidates_for_feature(ft);
  17464. auto cmp = [&r](Candidate a, Candidate b) {
  17465. if (r.weights[a] == r.weights[b])
  17466. return a < b; // Prefer candidates with lower enum value
  17467. return r.weights[a] > r.weights[b];
  17468. };
  17469. std::sort(c.begin(), c.end(), cmp);
  17470. return c;
  17471. }
  17472. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17473. get_candidates_for_feature(Feature feature)
  17474. {
  17475. switch (feature)
  17476. {
  17477. case SubgroupMask:
  17478. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  17479. case SubgroupSize:
  17480. return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
  17481. case SubgroupInvocationID:
  17482. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
  17483. case SubgroupID:
  17484. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  17485. case NumSubgroups:
  17486. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  17487. case SubgroupBroadcast_First:
  17488. return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
  17489. case SubgroupBallotFindLSB_MSB:
  17490. return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
  17491. case SubgroupAll_Any_AllEqualBool:
  17492. return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
  17493. case SubgroupAllEqualT:
  17494. return {}; // depends on other features only
  17495. case SubgroupElect:
  17496. return {}; // depends on other features only
  17497. case SubgroupBallot:
  17498. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  17499. case SubgroupBarrier:
  17500. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
  17501. case SubgroupMemBarrier:
  17502. return { KHR_shader_subgroup_basic };
  17503. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  17504. return {};
  17505. case SubgroupBallotBitExtract:
  17506. return { NV_shader_thread_group };
  17507. case SubgroupBallotBitCount:
  17508. return {};
  17509. case SubgroupArithmeticIAddReduce:
  17510. case SubgroupArithmeticIAddExclusiveScan:
  17511. case SubgroupArithmeticIAddInclusiveScan:
  17512. case SubgroupArithmeticFAddReduce:
  17513. case SubgroupArithmeticFAddExclusiveScan:
  17514. case SubgroupArithmeticFAddInclusiveScan:
  17515. case SubgroupArithmeticIMulReduce:
  17516. case SubgroupArithmeticIMulExclusiveScan:
  17517. case SubgroupArithmeticIMulInclusiveScan:
  17518. case SubgroupArithmeticFMulReduce:
  17519. case SubgroupArithmeticFMulExclusiveScan:
  17520. case SubgroupArithmeticFMulInclusiveScan:
  17521. return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
  17522. default:
  17523. return {};
  17524. }
  17525. }
  17526. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
  17527. const SmallVector<Feature> &features)
  17528. {
  17529. FeatureMask mask = 0;
  17530. for (Feature f : features)
  17531. mask |= FeatureMask(1) << f;
  17532. return mask;
  17533. }
  17534. CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
  17535. {
  17536. for (auto &weight : weights)
  17537. weight = 0;
  17538. // Make sure KHR_shader_subgroup extensions are always prefered.
  17539. const uint32_t big_num = FeatureCount;
  17540. weights[KHR_shader_subgroup_ballot] = big_num;
  17541. weights[KHR_shader_subgroup_basic] = big_num;
  17542. weights[KHR_shader_subgroup_vote] = big_num;
  17543. weights[KHR_shader_subgroup_arithmetic] = big_num;
  17544. }
  17545. void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
  17546. {
  17547. // Must be ordered to maintain deterministic output, so vector is appropriate.
  17548. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
  17549. end(workaround_ubo_load_overload_types))
  17550. {
  17551. force_recompile();
  17552. workaround_ubo_load_overload_types.push_back(id);
  17553. }
  17554. }
  17555. void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
  17556. {
  17557. // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
  17558. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
  17559. // ensure row_major decoration is actually respected.
  17560. auto *var = maybe_get_backing_variable(ptr);
  17561. if (!var)
  17562. return;
  17563. auto &backing_type = get<SPIRType>(var->basetype);
  17564. bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
  17565. has_decoration(backing_type.self, DecorationBlock);
  17566. if (!is_ubo)
  17567. return;
  17568. auto *type = &get<SPIRType>(loaded_type);
  17569. bool rewrite = false;
  17570. bool relaxed = options.es;
  17571. if (is_matrix(*type))
  17572. {
  17573. // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
  17574. // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
  17575. // If there is any row-major action going on, we apply the workaround.
  17576. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
  17577. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
  17578. type = &backing_type;
  17579. }
  17580. else
  17581. {
  17582. // If we're loading a composite, we don't have overloads like these.
  17583. relaxed = false;
  17584. }
  17585. if (type->basetype == SPIRType::Struct)
  17586. {
  17587. // If we're loading a struct where any member is a row-major matrix, apply the workaround.
  17588. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
  17589. {
  17590. auto decorations = combined_decoration_for_member(*type, i);
  17591. if (decorations.get(DecorationRowMajor))
  17592. rewrite = true;
  17593. // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
  17594. if (!decorations.get(DecorationRelaxedPrecision))
  17595. relaxed = false;
  17596. }
  17597. }
  17598. if (rewrite)
  17599. {
  17600. request_workaround_wrapper_overload(loaded_type);
  17601. expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")");
  17602. }
  17603. }
  17604. void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
  17605. {
  17606. masked_output_locations.insert({ location, component });
  17607. }
  17608. void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
  17609. {
  17610. masked_output_builtins.insert(builtin);
  17611. }
  17612. bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
  17613. {
  17614. auto &type = get<SPIRType>(var.basetype);
  17615. bool is_block = has_decoration(type.self, DecorationBlock);
  17616. // Blocks by themselves are never masked. Must be masked per-member.
  17617. if (is_block)
  17618. return false;
  17619. bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
  17620. if (is_builtin)
  17621. {
  17622. return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
  17623. }
  17624. else
  17625. {
  17626. if (!has_decoration(var.self, DecorationLocation))
  17627. return false;
  17628. return is_stage_output_location_masked(
  17629. get_decoration(var.self, DecorationLocation),
  17630. get_decoration(var.self, DecorationComponent));
  17631. }
  17632. }
  17633. bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
  17634. {
  17635. auto &type = get<SPIRType>(var.basetype);
  17636. bool is_block = has_decoration(type.self, DecorationBlock);
  17637. if (!is_block)
  17638. return false;
  17639. BuiltIn builtin = BuiltInMax;
  17640. if (is_member_builtin(type, index, &builtin))
  17641. {
  17642. return is_stage_output_builtin_masked(builtin);
  17643. }
  17644. else
  17645. {
  17646. uint32_t location = get_declared_member_location(var, index, strip_array);
  17647. uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
  17648. return is_stage_output_location_masked(location, component);
  17649. }
  17650. }
  17651. bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
  17652. {
  17653. if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  17654. return true;
  17655. auto &type = get<SPIRType>(var.basetype);
  17656. if (!has_decoration(type.self, DecorationBlock))
  17657. return false;
  17658. for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
  17659. if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  17660. return false;
  17661. return true;
  17662. }
  17663. bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
  17664. {
  17665. return masked_output_locations.count({ location, component }) != 0;
  17666. }
  17667. bool CompilerGLSL::is_stage_output_builtin_masked(BuiltIn builtin) const
  17668. {
  17669. return masked_output_builtins.count(builtin) != 0;
  17670. }
  17671. uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  17672. {
  17673. auto &block_type = get<SPIRType>(var.basetype);
  17674. if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
  17675. return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
  17676. else
  17677. return get_accumulated_member_location(var, mbr_idx, strip_array);
  17678. }
  17679. uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  17680. {
  17681. auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  17682. uint32_t location = get_decoration(var.self, DecorationLocation);
  17683. for (uint32_t i = 0; i < mbr_idx; i++)
  17684. {
  17685. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  17686. // Start counting from any place we have a new location decoration.
  17687. if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
  17688. location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
  17689. uint32_t location_count = type_to_location_count(mbr_type);
  17690. location += location_count;
  17691. }
  17692. return location;
  17693. }
  17694. StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
  17695. {
  17696. auto *var = maybe_get_backing_variable(ptr);
  17697. // If the expression has been lowered to a temporary, we need to use the Generic storage class.
  17698. // We're looking for the effective storage class of a given expression.
  17699. // An access chain or forwarded OpLoads from such access chains
  17700. // will generally have the storage class of the underlying variable, but if the load was not forwarded
  17701. // we have lost any address space qualifiers.
  17702. bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
  17703. (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
  17704. if (var && !forced_temporary)
  17705. {
  17706. if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
  17707. return StorageClassWorkgroup;
  17708. if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
  17709. return StorageClassStorageBuffer;
  17710. // Normalize SSBOs to StorageBuffer here.
  17711. if (var->storage == StorageClassUniform &&
  17712. has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
  17713. return StorageClassStorageBuffer;
  17714. else
  17715. return var->storage;
  17716. }
  17717. else
  17718. return expression_type(ptr).storage;
  17719. }
  17720. uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
  17721. {
  17722. uint32_t count;
  17723. if (type.basetype == SPIRType::Struct)
  17724. {
  17725. uint32_t mbr_count = uint32_t(type.member_types.size());
  17726. count = 0;
  17727. for (uint32_t i = 0; i < mbr_count; i++)
  17728. count += type_to_location_count(get<SPIRType>(type.member_types[i]));
  17729. }
  17730. else
  17731. {
  17732. count = type.columns > 1 ? type.columns : 1;
  17733. }
  17734. uint32_t dim_count = uint32_t(type.array.size());
  17735. for (uint32_t i = 0; i < dim_count; i++)
  17736. count *= to_array_size_literal(type, i);
  17737. return count;
  17738. }
  17739. std::string CompilerGLSL::format_float(float value) const
  17740. {
  17741. if (float_formatter)
  17742. return float_formatter->format_float(value);
  17743. // default behavior
  17744. return convert_to_string(value, current_locale_radix_character);
  17745. }
  17746. std::string CompilerGLSL::format_double(double value) const
  17747. {
  17748. if (float_formatter)
  17749. return float_formatter->format_double(value);
  17750. // default behavior
  17751. return convert_to_string(value, current_locale_radix_character);
  17752. }
  17753. std::string CompilerGLSL::to_pretty_expression_if_int_constant(
  17754. uint32_t id,
  17755. const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
  17756. bool register_expression_read)
  17757. {
  17758. auto *c = maybe_get<SPIRConstant>(id);
  17759. if (c && !c->specialization)
  17760. {
  17761. auto value = c->scalar();
  17762. auto pretty_name = std::find_if(mapping_start, mapping_end,
  17763. [value](const GlslConstantNameMapping &mapping) { return mapping.value == value; });
  17764. if (pretty_name != mapping_end)
  17765. return pretty_name->alias;
  17766. }
  17767. return join("int(", to_expression(id, register_expression_read), ")");
  17768. }
  17769. uint32_t CompilerGLSL::get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const
  17770. {
  17771. uint32_t fp_flags = ~0;
  17772. if (!type_is_floating_point(get<SPIRType>(result_type)))
  17773. return fp_flags;
  17774. auto &ep = get_entry_point();
  17775. // Per-operation flag supersedes all defaults.
  17776. if (id != 0 && has_decoration(id, DecorationFPFastMathMode))
  17777. return get_decoration(id, DecorationFPFastMathMode);
  17778. // Handle float_controls1 execution modes.
  17779. uint32_t width = get<SPIRType>(result_type).width;
  17780. bool szinp = false;
  17781. switch (width)
  17782. {
  17783. case 8:
  17784. szinp = ep.signed_zero_inf_nan_preserve_8;
  17785. break;
  17786. case 16:
  17787. szinp = ep.signed_zero_inf_nan_preserve_16;
  17788. break;
  17789. case 32:
  17790. szinp = ep.signed_zero_inf_nan_preserve_32;
  17791. break;
  17792. case 64:
  17793. szinp = ep.signed_zero_inf_nan_preserve_64;
  17794. break;
  17795. default:
  17796. break;
  17797. }
  17798. if (szinp)
  17799. fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
  17800. // Legacy NoContraction deals with any kind of transform to the expression.
  17801. if (id != 0 && has_decoration(id, DecorationNoContraction))
  17802. fp_flags &= ~(FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask);
  17803. // Handle float_controls2 execution modes.
  17804. bool found_default = false;
  17805. for (auto &fp_pair : ep.fp_fast_math_defaults)
  17806. {
  17807. if (get<SPIRType>(fp_pair.first).width == width && fp_pair.second)
  17808. {
  17809. fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
  17810. found_default = true;
  17811. }
  17812. }
  17813. // From SPV_KHR_float_controls2:
  17814. // "This definition implies that, if the entry point set any FPFastMathDefault execution mode
  17815. // then any type for which a default is not set uses no fast math flags
  17816. // (although this can still be overridden on a per-operation basis).
  17817. // Modules must not mix setting fast math modes explicitly using this extension and relying on older API defaults."
  17818. if (!found_default && !ep.fp_fast_math_defaults.empty())
  17819. fp_flags = 0;
  17820. return fp_flags;
  17821. }
  17822. bool CompilerGLSL::has_legacy_nocontract(uint32_t result_type, uint32_t id) const
  17823. {
  17824. const auto fp_flags = FPFastMathModeAllowContractMask |
  17825. FPFastMathModeAllowTransformMask |
  17826. FPFastMathModeAllowReassocMask;
  17827. return (get_fp_fast_math_flags_for_op(result_type, id) & fp_flags) != fp_flags;
  17828. }