spirv_msl.cpp 575 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168
  1. /*
  2. * Copyright 2016-2021 The Brenwill Workshop Ltd.
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * At your option, you may choose to accept this material under either:
  19. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  20. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  21. */
  22. #include "spirv_msl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include <algorithm>
  25. #include <assert.h>
  26. #include <numeric>
  27. using namespace spv;
  28. using namespace SPIRV_CROSS_NAMESPACE;
  29. using namespace std;
  30. static const uint32_t k_unknown_location = ~0u;
  31. static const uint32_t k_unknown_component = ~0u;
  32. static const char *force_inline = "static inline __attribute__((always_inline))";
  33. CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
  34. : CompilerGLSL(move(spirv_))
  35. {
  36. }
  37. CompilerMSL::CompilerMSL(const uint32_t *ir_, size_t word_count)
  38. : CompilerGLSL(ir_, word_count)
  39. {
  40. }
  41. CompilerMSL::CompilerMSL(const ParsedIR &ir_)
  42. : CompilerGLSL(ir_)
  43. {
  44. }
  45. CompilerMSL::CompilerMSL(ParsedIR &&ir_)
  46. : CompilerGLSL(std::move(ir_))
  47. {
  48. }
  49. void CompilerMSL::add_msl_shader_input(const MSLShaderInput &si)
  50. {
  51. inputs_by_location[{si.location, si.component}] = si;
  52. if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin))
  53. inputs_by_builtin[si.builtin] = si;
  54. }
  55. void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
  56. {
  57. StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
  58. resource_bindings[tuple] = { binding, false };
  59. // If we might need to pad argument buffer members to positionally align
  60. // arg buffer indexes, also maintain a lookup by argument buffer index.
  61. if (msl_options.pad_argument_buffer_resources)
  62. {
  63. StageSetBinding arg_idx_tuple = { binding.stage, binding.desc_set, k_unknown_component };
  64. #define ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(rez) \
  65. arg_idx_tuple.binding = binding.msl_##rez; \
  66. resource_arg_buff_idx_to_binding_number[arg_idx_tuple] = binding.binding
  67. switch (binding.basetype)
  68. {
  69. case SPIRType::Void:
  70. case SPIRType::Boolean:
  71. case SPIRType::SByte:
  72. case SPIRType::UByte:
  73. case SPIRType::Short:
  74. case SPIRType::UShort:
  75. case SPIRType::Int:
  76. case SPIRType::UInt:
  77. case SPIRType::Int64:
  78. case SPIRType::UInt64:
  79. case SPIRType::AtomicCounter:
  80. case SPIRType::Half:
  81. case SPIRType::Float:
  82. case SPIRType::Double:
  83. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(buffer);
  84. break;
  85. case SPIRType::Image:
  86. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
  87. break;
  88. case SPIRType::Sampler:
  89. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
  90. break;
  91. case SPIRType::SampledImage:
  92. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
  93. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
  94. break;
  95. default:
  96. SPIRV_CROSS_THROW("Unexpected argument buffer resource base type. When padding argument buffer elements, "
  97. "all descriptor set resources must be supplied with a base type by the app.");
  98. }
  99. #undef ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP
  100. }
  101. }
  102. void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
  103. {
  104. SetBindingPair pair = { desc_set, binding };
  105. buffers_requiring_dynamic_offset[pair] = { index, 0 };
  106. }
  107. void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding)
  108. {
  109. SetBindingPair pair = { desc_set, binding };
  110. inline_uniform_blocks.insert(pair);
  111. }
  112. void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
  113. {
  114. if (desc_set < kMaxArgumentBuffers)
  115. argument_buffer_discrete_mask |= 1u << desc_set;
  116. }
  117. void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage)
  118. {
  119. if (desc_set < kMaxArgumentBuffers)
  120. {
  121. if (device_storage)
  122. argument_buffer_device_storage_mask |= 1u << desc_set;
  123. else
  124. argument_buffer_device_storage_mask &= ~(1u << desc_set);
  125. }
  126. }
  127. bool CompilerMSL::is_msl_shader_input_used(uint32_t location)
  128. {
  129. // Don't report internal location allocations to app.
  130. return location_inputs_in_use.count(location) != 0 &&
  131. location_inputs_in_use_fallback.count(location) == 0;
  132. }
  133. uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const
  134. {
  135. auto itr = builtin_to_automatic_input_location.find(builtin);
  136. if (itr == builtin_to_automatic_input_location.end())
  137. return k_unknown_location;
  138. else
  139. return itr->second;
  140. }
  141. bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
  142. {
  143. StageSetBinding tuple = { model, desc_set, binding };
  144. auto itr = resource_bindings.find(tuple);
  145. return itr != end(resource_bindings) && itr->second.second;
  146. }
  147. // Returns the size of the array of resources used by the variable with the specified id.
  148. // The returned value is retrieved from the resource binding added using add_msl_resource_binding().
  149. uint32_t CompilerMSL::get_resource_array_size(uint32_t id) const
  150. {
  151. StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet),
  152. get_decoration(id, DecorationBinding) };
  153. auto itr = resource_bindings.find(tuple);
  154. return itr != end(resource_bindings) ? itr->second.first.count : 0;
  155. }
  156. uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
  157. {
  158. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary);
  159. }
  160. uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const
  161. {
  162. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary);
  163. }
  164. uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const
  165. {
  166. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary);
  167. }
  168. uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const
  169. {
  170. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary);
  171. }
  172. void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
  173. {
  174. fragment_output_components[location] = components;
  175. }
  176. bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
  177. {
  178. return (builtin == BuiltInSampleMask);
  179. }
  180. void CompilerMSL::build_implicit_builtins()
  181. {
  182. bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
  183. bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex &&
  184. !msl_options.vertex_for_tessellation;
  185. bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl;
  186. bool need_subgroup_mask =
  187. active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) ||
  188. active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) ||
  189. active_input_builtins.get(BuiltInSubgroupLtMask);
  190. bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
  191. active_input_builtins.get(BuiltInSubgroupGtMask));
  192. bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index &&
  193. msl_options.multiview_layered_rendering &&
  194. (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
  195. bool need_dispatch_base =
  196. msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute &&
  197. (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId));
  198. bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation;
  199. bool need_vertex_base_params =
  200. need_grid_params &&
  201. (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) ||
  202. active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) ||
  203. active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
  204. bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
  205. bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
  206. if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
  207. need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id ||
  208. needs_subgroup_invocation_id || needs_subgroup_size || has_additional_fixed_sample_mask() || need_local_invocation_index ||
  209. need_workgroup_size)
  210. {
  211. bool has_frag_coord = false;
  212. bool has_sample_id = false;
  213. bool has_vertex_idx = false;
  214. bool has_base_vertex = false;
  215. bool has_instance_idx = false;
  216. bool has_base_instance = false;
  217. bool has_invocation_id = false;
  218. bool has_primitive_id = false;
  219. bool has_subgroup_invocation_id = false;
  220. bool has_subgroup_size = false;
  221. bool has_view_idx = false;
  222. bool has_layer = false;
  223. bool has_local_invocation_index = false;
  224. bool has_workgroup_size = false;
  225. uint32_t workgroup_id_type = 0;
  226. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  227. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  228. return;
  229. if (!interface_variable_exists_in_entry_point(var.self))
  230. return;
  231. if (!has_decoration(var.self, DecorationBuiltIn))
  232. return;
  233. BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
  234. if (var.storage == StorageClassOutput)
  235. {
  236. if (has_additional_fixed_sample_mask() && builtin == BuiltInSampleMask)
  237. {
  238. builtin_sample_mask_id = var.self;
  239. mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
  240. does_shader_write_sample_mask = true;
  241. }
  242. }
  243. if (var.storage != StorageClassInput)
  244. return;
  245. // Use Metal's native frame-buffer fetch API for subpass inputs.
  246. if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
  247. {
  248. switch (builtin)
  249. {
  250. case BuiltInFragCoord:
  251. mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var.self);
  252. builtin_frag_coord_id = var.self;
  253. has_frag_coord = true;
  254. break;
  255. case BuiltInLayer:
  256. if (!msl_options.arrayed_subpass_input || msl_options.multiview)
  257. break;
  258. mark_implicit_builtin(StorageClassInput, BuiltInLayer, var.self);
  259. builtin_layer_id = var.self;
  260. has_layer = true;
  261. break;
  262. case BuiltInViewIndex:
  263. if (!msl_options.multiview)
  264. break;
  265. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
  266. builtin_view_idx_id = var.self;
  267. has_view_idx = true;
  268. break;
  269. default:
  270. break;
  271. }
  272. }
  273. if ((need_sample_pos || needs_sample_id) && builtin == BuiltInSampleId)
  274. {
  275. builtin_sample_id_id = var.self;
  276. mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var.self);
  277. has_sample_id = true;
  278. }
  279. if (need_vertex_params)
  280. {
  281. switch (builtin)
  282. {
  283. case BuiltInVertexIndex:
  284. builtin_vertex_idx_id = var.self;
  285. mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var.self);
  286. has_vertex_idx = true;
  287. break;
  288. case BuiltInBaseVertex:
  289. builtin_base_vertex_id = var.self;
  290. mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var.self);
  291. has_base_vertex = true;
  292. break;
  293. case BuiltInInstanceIndex:
  294. builtin_instance_idx_id = var.self;
  295. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
  296. has_instance_idx = true;
  297. break;
  298. case BuiltInBaseInstance:
  299. builtin_base_instance_id = var.self;
  300. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
  301. has_base_instance = true;
  302. break;
  303. default:
  304. break;
  305. }
  306. }
  307. if (need_tesc_params)
  308. {
  309. switch (builtin)
  310. {
  311. case BuiltInInvocationId:
  312. builtin_invocation_id_id = var.self;
  313. mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var.self);
  314. has_invocation_id = true;
  315. break;
  316. case BuiltInPrimitiveId:
  317. builtin_primitive_id_id = var.self;
  318. mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var.self);
  319. has_primitive_id = true;
  320. break;
  321. default:
  322. break;
  323. }
  324. }
  325. if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId)
  326. {
  327. builtin_subgroup_invocation_id_id = var.self;
  328. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var.self);
  329. has_subgroup_invocation_id = true;
  330. }
  331. if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
  332. {
  333. builtin_subgroup_size_id = var.self;
  334. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
  335. has_subgroup_size = true;
  336. }
  337. if (need_multiview)
  338. {
  339. switch (builtin)
  340. {
  341. case BuiltInInstanceIndex:
  342. // The view index here is derived from the instance index.
  343. builtin_instance_idx_id = var.self;
  344. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
  345. has_instance_idx = true;
  346. break;
  347. case BuiltInBaseInstance:
  348. // If a non-zero base instance is used, we need to adjust for it when calculating the view index.
  349. builtin_base_instance_id = var.self;
  350. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
  351. has_base_instance = true;
  352. break;
  353. case BuiltInViewIndex:
  354. builtin_view_idx_id = var.self;
  355. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
  356. has_view_idx = true;
  357. break;
  358. default:
  359. break;
  360. }
  361. }
  362. if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex)
  363. {
  364. builtin_local_invocation_index_id = var.self;
  365. mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self);
  366. has_local_invocation_index = true;
  367. }
  368. if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
  369. {
  370. builtin_workgroup_size_id = var.self;
  371. mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
  372. has_workgroup_size = true;
  373. }
  374. // The base workgroup needs to have the same type and vector size
  375. // as the workgroup or invocation ID, so keep track of the type that
  376. // was used.
  377. if (need_dispatch_base && workgroup_id_type == 0 &&
  378. (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId))
  379. workgroup_id_type = var.basetype;
  380. });
  381. // Use Metal's native frame-buffer fetch API for subpass inputs.
  382. if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
  383. (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
  384. (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
  385. {
  386. if (!has_frag_coord)
  387. {
  388. uint32_t offset = ir.increase_bound_by(3);
  389. uint32_t type_id = offset;
  390. uint32_t type_ptr_id = offset + 1;
  391. uint32_t var_id = offset + 2;
  392. // Create gl_FragCoord.
  393. SPIRType vec4_type;
  394. vec4_type.basetype = SPIRType::Float;
  395. vec4_type.width = 32;
  396. vec4_type.vecsize = 4;
  397. set<SPIRType>(type_id, vec4_type);
  398. SPIRType vec4_type_ptr;
  399. vec4_type_ptr = vec4_type;
  400. vec4_type_ptr.pointer = true;
  401. vec4_type_ptr.pointer_depth++;
  402. vec4_type_ptr.parent_type = type_id;
  403. vec4_type_ptr.storage = StorageClassInput;
  404. auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
  405. ptr_type.self = type_id;
  406. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  407. set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
  408. builtin_frag_coord_id = var_id;
  409. mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
  410. }
  411. if (!has_layer && msl_options.arrayed_subpass_input && !msl_options.multiview)
  412. {
  413. uint32_t offset = ir.increase_bound_by(2);
  414. uint32_t type_ptr_id = offset;
  415. uint32_t var_id = offset + 1;
  416. // Create gl_Layer.
  417. SPIRType uint_type_ptr;
  418. uint_type_ptr = get_uint_type();
  419. uint_type_ptr.pointer = true;
  420. uint_type_ptr.pointer_depth++;
  421. uint_type_ptr.parent_type = get_uint_type_id();
  422. uint_type_ptr.storage = StorageClassInput;
  423. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  424. ptr_type.self = get_uint_type_id();
  425. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  426. set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
  427. builtin_layer_id = var_id;
  428. mark_implicit_builtin(StorageClassInput, BuiltInLayer, var_id);
  429. }
  430. if (!has_view_idx && msl_options.multiview)
  431. {
  432. uint32_t offset = ir.increase_bound_by(2);
  433. uint32_t type_ptr_id = offset;
  434. uint32_t var_id = offset + 1;
  435. // Create gl_ViewIndex.
  436. SPIRType uint_type_ptr;
  437. uint_type_ptr = get_uint_type();
  438. uint_type_ptr.pointer = true;
  439. uint_type_ptr.pointer_depth++;
  440. uint_type_ptr.parent_type = get_uint_type_id();
  441. uint_type_ptr.storage = StorageClassInput;
  442. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  443. ptr_type.self = get_uint_type_id();
  444. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  445. set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
  446. builtin_view_idx_id = var_id;
  447. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
  448. }
  449. }
  450. if (!has_sample_id && (need_sample_pos || needs_sample_id))
  451. {
  452. uint32_t offset = ir.increase_bound_by(2);
  453. uint32_t type_ptr_id = offset;
  454. uint32_t var_id = offset + 1;
  455. // Create gl_SampleID.
  456. SPIRType uint_type_ptr;
  457. uint_type_ptr = get_uint_type();
  458. uint_type_ptr.pointer = true;
  459. uint_type_ptr.pointer_depth++;
  460. uint_type_ptr.parent_type = get_uint_type_id();
  461. uint_type_ptr.storage = StorageClassInput;
  462. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  463. ptr_type.self = get_uint_type_id();
  464. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  465. set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
  466. builtin_sample_id_id = var_id;
  467. mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
  468. }
  469. if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
  470. (need_multiview && (!has_instance_idx || !has_base_instance || !has_view_idx)))
  471. {
  472. uint32_t type_ptr_id = ir.increase_bound_by(1);
  473. SPIRType uint_type_ptr;
  474. uint_type_ptr = get_uint_type();
  475. uint_type_ptr.pointer = true;
  476. uint_type_ptr.pointer_depth++;
  477. uint_type_ptr.parent_type = get_uint_type_id();
  478. uint_type_ptr.storage = StorageClassInput;
  479. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  480. ptr_type.self = get_uint_type_id();
  481. if (need_vertex_params && !has_vertex_idx)
  482. {
  483. uint32_t var_id = ir.increase_bound_by(1);
  484. // Create gl_VertexIndex.
  485. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  486. set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex);
  487. builtin_vertex_idx_id = var_id;
  488. mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
  489. }
  490. if (need_vertex_params && !has_base_vertex)
  491. {
  492. uint32_t var_id = ir.increase_bound_by(1);
  493. // Create gl_BaseVertex.
  494. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  495. set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex);
  496. builtin_base_vertex_id = var_id;
  497. mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
  498. }
  499. if (!has_instance_idx) // Needed by both multiview and tessellation
  500. {
  501. uint32_t var_id = ir.increase_bound_by(1);
  502. // Create gl_InstanceIndex.
  503. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  504. set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
  505. builtin_instance_idx_id = var_id;
  506. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
  507. }
  508. if (!has_base_instance) // Needed by both multiview and tessellation
  509. {
  510. uint32_t var_id = ir.increase_bound_by(1);
  511. // Create gl_BaseInstance.
  512. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  513. set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance);
  514. builtin_base_instance_id = var_id;
  515. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
  516. }
  517. if (need_multiview)
  518. {
  519. // Multiview shaders are not allowed to write to gl_Layer, ostensibly because
  520. // it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
  521. // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
  522. // gl_Layer is an output in vertex-pipeline shaders.
  523. uint32_t type_ptr_out_id = ir.increase_bound_by(2);
  524. SPIRType uint_type_ptr_out;
  525. uint_type_ptr_out = get_uint_type();
  526. uint_type_ptr_out.pointer = true;
  527. uint_type_ptr_out.pointer_depth++;
  528. uint_type_ptr_out.parent_type = get_uint_type_id();
  529. uint_type_ptr_out.storage = StorageClassOutput;
  530. auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
  531. ptr_out_type.self = get_uint_type_id();
  532. uint32_t var_id = type_ptr_out_id + 1;
  533. set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
  534. set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
  535. builtin_layer_id = var_id;
  536. mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
  537. }
  538. if (need_multiview && !has_view_idx)
  539. {
  540. uint32_t var_id = ir.increase_bound_by(1);
  541. // Create gl_ViewIndex.
  542. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  543. set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
  544. builtin_view_idx_id = var_id;
  545. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
  546. }
  547. }
  548. if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
  549. need_grid_params)
  550. {
  551. uint32_t type_ptr_id = ir.increase_bound_by(1);
  552. SPIRType uint_type_ptr;
  553. uint_type_ptr = get_uint_type();
  554. uint_type_ptr.pointer = true;
  555. uint_type_ptr.pointer_depth++;
  556. uint_type_ptr.parent_type = get_uint_type_id();
  557. uint_type_ptr.storage = StorageClassInput;
  558. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  559. ptr_type.self = get_uint_type_id();
  560. if (msl_options.multi_patch_workgroup || need_grid_params)
  561. {
  562. uint32_t var_id = ir.increase_bound_by(1);
  563. // Create gl_GlobalInvocationID.
  564. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  565. set_decoration(var_id, DecorationBuiltIn, BuiltInGlobalInvocationId);
  566. builtin_invocation_id_id = var_id;
  567. mark_implicit_builtin(StorageClassInput, BuiltInGlobalInvocationId, var_id);
  568. }
  569. else if (need_tesc_params && !has_invocation_id)
  570. {
  571. uint32_t var_id = ir.increase_bound_by(1);
  572. // Create gl_InvocationID.
  573. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  574. set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId);
  575. builtin_invocation_id_id = var_id;
  576. mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id);
  577. }
  578. if (need_tesc_params && !has_primitive_id)
  579. {
  580. uint32_t var_id = ir.increase_bound_by(1);
  581. // Create gl_PrimitiveID.
  582. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  583. set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId);
  584. builtin_primitive_id_id = var_id;
  585. mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id);
  586. }
  587. if (need_grid_params)
  588. {
  589. uint32_t var_id = ir.increase_bound_by(1);
  590. set<SPIRVariable>(var_id, build_extended_vector_type(get_uint_type_id(), 3), StorageClassInput);
  591. set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize);
  592. get_entry_point().interface_variables.push_back(var_id);
  593. set_name(var_id, "spvStageInputSize");
  594. builtin_stage_input_size_id = var_id;
  595. }
  596. }
  597. if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id))
  598. {
  599. uint32_t offset = ir.increase_bound_by(2);
  600. uint32_t type_ptr_id = offset;
  601. uint32_t var_id = offset + 1;
  602. // Create gl_SubgroupInvocationID.
  603. SPIRType uint_type_ptr;
  604. uint_type_ptr = get_uint_type();
  605. uint_type_ptr.pointer = true;
  606. uint_type_ptr.pointer_depth++;
  607. uint_type_ptr.parent_type = get_uint_type_id();
  608. uint_type_ptr.storage = StorageClassInput;
  609. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  610. ptr_type.self = get_uint_type_id();
  611. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  612. set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId);
  613. builtin_subgroup_invocation_id_id = var_id;
  614. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
  615. }
  616. if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
  617. {
  618. uint32_t offset = ir.increase_bound_by(2);
  619. uint32_t type_ptr_id = offset;
  620. uint32_t var_id = offset + 1;
  621. // Create gl_SubgroupSize.
  622. SPIRType uint_type_ptr;
  623. uint_type_ptr = get_uint_type();
  624. uint_type_ptr.pointer = true;
  625. uint_type_ptr.pointer_depth++;
  626. uint_type_ptr.parent_type = get_uint_type_id();
  627. uint_type_ptr.storage = StorageClassInput;
  628. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  629. ptr_type.self = get_uint_type_id();
  630. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  631. set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
  632. builtin_subgroup_size_id = var_id;
  633. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
  634. }
  635. if (need_dispatch_base || need_vertex_base_params)
  636. {
  637. if (workgroup_id_type == 0)
  638. workgroup_id_type = build_extended_vector_type(get_uint_type_id(), 3);
  639. uint32_t var_id;
  640. if (msl_options.supports_msl_version(1, 2))
  641. {
  642. // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin
  643. // to convey this information and save a buffer slot.
  644. uint32_t offset = ir.increase_bound_by(1);
  645. var_id = offset;
  646. set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput);
  647. set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase);
  648. get_entry_point().interface_variables.push_back(var_id);
  649. }
  650. else
  651. {
  652. // Otherwise, we need to fall back to a good ol' fashioned buffer.
  653. uint32_t offset = ir.increase_bound_by(2);
  654. var_id = offset;
  655. uint32_t type_id = offset + 1;
  656. SPIRType var_type = get<SPIRType>(workgroup_id_type);
  657. var_type.storage = StorageClassUniform;
  658. set<SPIRType>(type_id, var_type);
  659. set<SPIRVariable>(var_id, type_id, StorageClassUniform);
  660. // This should never match anything.
  661. set_decoration(var_id, DecorationDescriptorSet, ~(5u));
  662. set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index);
  663. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
  664. msl_options.indirect_params_buffer_index);
  665. }
  666. set_name(var_id, "spvDispatchBase");
  667. builtin_dispatch_base_id = var_id;
  668. }
  669. if (has_additional_fixed_sample_mask() && !does_shader_write_sample_mask)
  670. {
  671. uint32_t offset = ir.increase_bound_by(2);
  672. uint32_t var_id = offset + 1;
  673. // Create gl_SampleMask.
  674. SPIRType uint_type_ptr_out;
  675. uint_type_ptr_out = get_uint_type();
  676. uint_type_ptr_out.pointer = true;
  677. uint_type_ptr_out.pointer_depth++;
  678. uint_type_ptr_out.parent_type = get_uint_type_id();
  679. uint_type_ptr_out.storage = StorageClassOutput;
  680. auto &ptr_out_type = set<SPIRType>(offset, uint_type_ptr_out);
  681. ptr_out_type.self = get_uint_type_id();
  682. set<SPIRVariable>(var_id, offset, StorageClassOutput);
  683. set_decoration(var_id, DecorationBuiltIn, BuiltInSampleMask);
  684. builtin_sample_mask_id = var_id;
  685. mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id);
  686. }
  687. if (need_local_invocation_index && !has_local_invocation_index)
  688. {
  689. uint32_t offset = ir.increase_bound_by(2);
  690. uint32_t type_ptr_id = offset;
  691. uint32_t var_id = offset + 1;
  692. // Create gl_LocalInvocationIndex.
  693. SPIRType uint_type_ptr;
  694. uint_type_ptr = get_uint_type();
  695. uint_type_ptr.pointer = true;
  696. uint_type_ptr.pointer_depth++;
  697. uint_type_ptr.parent_type = get_uint_type_id();
  698. uint_type_ptr.storage = StorageClassInput;
  699. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  700. ptr_type.self = get_uint_type_id();
  701. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  702. set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex);
  703. builtin_local_invocation_index_id = var_id;
  704. mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id);
  705. }
  706. if (need_workgroup_size && !has_workgroup_size)
  707. {
  708. uint32_t offset = ir.increase_bound_by(2);
  709. uint32_t type_ptr_id = offset;
  710. uint32_t var_id = offset + 1;
  711. // Create gl_WorkgroupSize.
  712. uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
  713. SPIRType uint_type_ptr = get<SPIRType>(type_id);
  714. uint_type_ptr.pointer = true;
  715. uint_type_ptr.pointer_depth++;
  716. uint_type_ptr.parent_type = type_id;
  717. uint_type_ptr.storage = StorageClassInput;
  718. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  719. ptr_type.self = type_id;
  720. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  721. set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
  722. builtin_workgroup_size_id = var_id;
  723. mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
  724. }
  725. }
  726. if (needs_swizzle_buffer_def)
  727. {
  728. uint32_t var_id = build_constant_uint_array_pointer();
  729. set_name(var_id, "spvSwizzleConstants");
  730. // This should never match anything.
  731. set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding);
  732. set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index);
  733. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index);
  734. swizzle_buffer_id = var_id;
  735. }
  736. if (!buffers_requiring_array_length.empty())
  737. {
  738. uint32_t var_id = build_constant_uint_array_pointer();
  739. set_name(var_id, "spvBufferSizeConstants");
  740. // This should never match anything.
  741. set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding);
  742. set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index);
  743. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
  744. buffer_size_buffer_id = var_id;
  745. }
  746. if (needs_view_mask_buffer())
  747. {
  748. uint32_t var_id = build_constant_uint_array_pointer();
  749. set_name(var_id, "spvViewMask");
  750. // This should never match anything.
  751. set_decoration(var_id, DecorationDescriptorSet, ~(4u));
  752. set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
  753. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
  754. view_mask_buffer_id = var_id;
  755. }
  756. if (!buffers_requiring_dynamic_offset.empty())
  757. {
  758. uint32_t var_id = build_constant_uint_array_pointer();
  759. set_name(var_id, "spvDynamicOffsets");
  760. // This should never match anything.
  761. set_decoration(var_id, DecorationDescriptorSet, ~(5u));
  762. set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
  763. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
  764. msl_options.dynamic_offsets_buffer_index);
  765. dynamic_offsets_buffer_id = var_id;
  766. }
  767. // If we're returning a struct from a vertex-like entry point, we must return a position attribute.
  768. bool need_position =
  769. (get_execution_model() == ExecutionModelVertex ||
  770. get_execution_model() == ExecutionModelTessellationEvaluation) &&
  771. !capture_output_to_buffer && !get_is_rasterization_disabled() &&
  772. !active_output_builtins.get(BuiltInPosition);
  773. if (need_position)
  774. {
  775. // If we can get away with returning void from entry point, we don't need to care.
  776. // If there is at least one other stage output, we need to return [[position]],
  777. // so we need to create one if it doesn't appear in the SPIR-V. Before adding the
  778. // implicit variable, check if it actually exists already, but just has not been used
  779. // or initialized, and if so, mark it as active, and do not create the implicit variable.
  780. bool has_output = false;
  781. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  782. if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
  783. {
  784. has_output = true;
  785. // Check if the var is the Position builtin
  786. if (has_decoration(var.self, DecorationBuiltIn) && get_decoration(var.self, DecorationBuiltIn) == BuiltInPosition)
  787. active_output_builtins.set(BuiltInPosition);
  788. // If the var is a struct, check if any members is the Position builtin
  789. auto &var_type = get_variable_element_type(var);
  790. if (var_type.basetype == SPIRType::Struct)
  791. {
  792. auto mbr_cnt = var_type.member_types.size();
  793. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  794. {
  795. auto builtin = BuiltInMax;
  796. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  797. if (is_builtin && builtin == BuiltInPosition)
  798. active_output_builtins.set(BuiltInPosition);
  799. }
  800. }
  801. }
  802. });
  803. need_position = has_output && !active_output_builtins.get(BuiltInPosition);
  804. }
  805. if (need_position)
  806. {
  807. uint32_t offset = ir.increase_bound_by(3);
  808. uint32_t type_id = offset;
  809. uint32_t type_ptr_id = offset + 1;
  810. uint32_t var_id = offset + 2;
  811. // Create gl_Position.
  812. SPIRType vec4_type;
  813. vec4_type.basetype = SPIRType::Float;
  814. vec4_type.width = 32;
  815. vec4_type.vecsize = 4;
  816. set<SPIRType>(type_id, vec4_type);
  817. SPIRType vec4_type_ptr;
  818. vec4_type_ptr = vec4_type;
  819. vec4_type_ptr.pointer = true;
  820. vec4_type_ptr.pointer_depth++;
  821. vec4_type_ptr.parent_type = type_id;
  822. vec4_type_ptr.storage = StorageClassOutput;
  823. auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
  824. ptr_type.self = type_id;
  825. set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
  826. set_decoration(var_id, DecorationBuiltIn, BuiltInPosition);
  827. mark_implicit_builtin(StorageClassOutput, BuiltInPosition, var_id);
  828. }
  829. }
  830. // Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active.
  831. // If not, it marks it as active and forces a recompilation.
  832. // This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted).
  833. void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin)
  834. {
  835. Bitset *active_builtins = nullptr;
  836. switch (storage)
  837. {
  838. case StorageClassInput:
  839. active_builtins = &active_input_builtins;
  840. break;
  841. case StorageClassOutput:
  842. active_builtins = &active_output_builtins;
  843. break;
  844. default:
  845. break;
  846. }
  847. // At this point, the specified builtin variable must have already been declared in the entry point.
  848. // If not, mark as active and force recompile.
  849. if (active_builtins != nullptr && !active_builtins->get(builtin))
  850. {
  851. active_builtins->set(builtin);
  852. force_recompile();
  853. }
  854. }
  855. void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
  856. {
  857. Bitset *active_builtins = nullptr;
  858. switch (storage)
  859. {
  860. case StorageClassInput:
  861. active_builtins = &active_input_builtins;
  862. break;
  863. case StorageClassOutput:
  864. active_builtins = &active_output_builtins;
  865. break;
  866. default:
  867. break;
  868. }
  869. assert(active_builtins != nullptr);
  870. active_builtins->set(builtin);
  871. auto &var = get_entry_point().interface_variables;
  872. if (find(begin(var), end(var), VariableID(id)) == end(var))
  873. var.push_back(id);
  874. }
  875. uint32_t CompilerMSL::build_constant_uint_array_pointer()
  876. {
  877. uint32_t offset = ir.increase_bound_by(3);
  878. uint32_t type_ptr_id = offset;
  879. uint32_t type_ptr_ptr_id = offset + 1;
  880. uint32_t var_id = offset + 2;
  881. // Create a buffer to hold extra data, including the swizzle constants.
  882. SPIRType uint_type_pointer = get_uint_type();
  883. uint_type_pointer.pointer = true;
  884. uint_type_pointer.pointer_depth++;
  885. uint_type_pointer.parent_type = get_uint_type_id();
  886. uint_type_pointer.storage = StorageClassUniform;
  887. set<SPIRType>(type_ptr_id, uint_type_pointer);
  888. set_decoration(type_ptr_id, DecorationArrayStride, 4);
  889. SPIRType uint_type_pointer2 = uint_type_pointer;
  890. uint_type_pointer2.pointer_depth++;
  891. uint_type_pointer2.parent_type = type_ptr_id;
  892. set<SPIRType>(type_ptr_ptr_id, uint_type_pointer2);
  893. set<SPIRVariable>(var_id, type_ptr_ptr_id, StorageClassUniformConstant);
  894. return var_id;
  895. }
  896. static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
  897. {
  898. switch (addr)
  899. {
  900. case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE:
  901. return join(prefix, "address::clamp_to_edge");
  902. case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO:
  903. return join(prefix, "address::clamp_to_zero");
  904. case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER:
  905. return join(prefix, "address::clamp_to_border");
  906. case MSL_SAMPLER_ADDRESS_REPEAT:
  907. return join(prefix, "address::repeat");
  908. case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT:
  909. return join(prefix, "address::mirrored_repeat");
  910. default:
  911. SPIRV_CROSS_THROW("Invalid sampler addressing mode.");
  912. }
  913. }
  914. SPIRType &CompilerMSL::get_stage_in_struct_type()
  915. {
  916. auto &si_var = get<SPIRVariable>(stage_in_var_id);
  917. return get_variable_data_type(si_var);
  918. }
  919. SPIRType &CompilerMSL::get_stage_out_struct_type()
  920. {
  921. auto &so_var = get<SPIRVariable>(stage_out_var_id);
  922. return get_variable_data_type(so_var);
  923. }
  924. SPIRType &CompilerMSL::get_patch_stage_in_struct_type()
  925. {
  926. auto &si_var = get<SPIRVariable>(patch_stage_in_var_id);
  927. return get_variable_data_type(si_var);
  928. }
  929. SPIRType &CompilerMSL::get_patch_stage_out_struct_type()
  930. {
  931. auto &so_var = get<SPIRVariable>(patch_stage_out_var_id);
  932. return get_variable_data_type(so_var);
  933. }
  934. std::string CompilerMSL::get_tess_factor_struct_name()
  935. {
  936. if (get_entry_point().flags.get(ExecutionModeTriangles))
  937. return "MTLTriangleTessellationFactorsHalf";
  938. return "MTLQuadTessellationFactorsHalf";
  939. }
  940. SPIRType &CompilerMSL::get_uint_type()
  941. {
  942. return get<SPIRType>(get_uint_type_id());
  943. }
  944. uint32_t CompilerMSL::get_uint_type_id()
  945. {
  946. if (uint_type_id != 0)
  947. return uint_type_id;
  948. uint_type_id = ir.increase_bound_by(1);
  949. SPIRType type;
  950. type.basetype = SPIRType::UInt;
  951. type.width = 32;
  952. set<SPIRType>(uint_type_id, type);
  953. return uint_type_id;
  954. }
  955. void CompilerMSL::emit_entry_point_declarations()
  956. {
  957. // FIXME: Get test coverage here ...
  958. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
  959. declare_complex_constant_arrays();
  960. // Emit constexpr samplers here.
  961. for (auto &samp : constexpr_samplers_by_id)
  962. {
  963. auto &var = get<SPIRVariable>(samp.first);
  964. auto &type = get<SPIRType>(var.basetype);
  965. if (type.basetype == SPIRType::Sampler)
  966. add_resource_name(samp.first);
  967. SmallVector<string> args;
  968. auto &s = samp.second;
  969. if (s.coord != MSL_SAMPLER_COORD_NORMALIZED)
  970. args.push_back("coord::pixel");
  971. if (s.min_filter == s.mag_filter)
  972. {
  973. if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
  974. args.push_back("filter::linear");
  975. }
  976. else
  977. {
  978. if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
  979. args.push_back("min_filter::linear");
  980. if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST)
  981. args.push_back("mag_filter::linear");
  982. }
  983. switch (s.mip_filter)
  984. {
  985. case MSL_SAMPLER_MIP_FILTER_NONE:
  986. // Default
  987. break;
  988. case MSL_SAMPLER_MIP_FILTER_NEAREST:
  989. args.push_back("mip_filter::nearest");
  990. break;
  991. case MSL_SAMPLER_MIP_FILTER_LINEAR:
  992. args.push_back("mip_filter::linear");
  993. break;
  994. default:
  995. SPIRV_CROSS_THROW("Invalid mip filter.");
  996. }
  997. if (s.s_address == s.t_address && s.s_address == s.r_address)
  998. {
  999. if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1000. args.push_back(create_sampler_address("", s.s_address));
  1001. }
  1002. else
  1003. {
  1004. if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1005. args.push_back(create_sampler_address("s_", s.s_address));
  1006. if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1007. args.push_back(create_sampler_address("t_", s.t_address));
  1008. if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1009. args.push_back(create_sampler_address("r_", s.r_address));
  1010. }
  1011. if (s.compare_enable)
  1012. {
  1013. switch (s.compare_func)
  1014. {
  1015. case MSL_SAMPLER_COMPARE_FUNC_ALWAYS:
  1016. args.push_back("compare_func::always");
  1017. break;
  1018. case MSL_SAMPLER_COMPARE_FUNC_NEVER:
  1019. args.push_back("compare_func::never");
  1020. break;
  1021. case MSL_SAMPLER_COMPARE_FUNC_EQUAL:
  1022. args.push_back("compare_func::equal");
  1023. break;
  1024. case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL:
  1025. args.push_back("compare_func::not_equal");
  1026. break;
  1027. case MSL_SAMPLER_COMPARE_FUNC_LESS:
  1028. args.push_back("compare_func::less");
  1029. break;
  1030. case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL:
  1031. args.push_back("compare_func::less_equal");
  1032. break;
  1033. case MSL_SAMPLER_COMPARE_FUNC_GREATER:
  1034. args.push_back("compare_func::greater");
  1035. break;
  1036. case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL:
  1037. args.push_back("compare_func::greater_equal");
  1038. break;
  1039. default:
  1040. SPIRV_CROSS_THROW("Invalid sampler compare function.");
  1041. }
  1042. }
  1043. if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER ||
  1044. s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER)
  1045. {
  1046. switch (s.border_color)
  1047. {
  1048. case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK:
  1049. args.push_back("border_color::opaque_black");
  1050. break;
  1051. case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE:
  1052. args.push_back("border_color::opaque_white");
  1053. break;
  1054. case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK:
  1055. args.push_back("border_color::transparent_black");
  1056. break;
  1057. default:
  1058. SPIRV_CROSS_THROW("Invalid sampler border color.");
  1059. }
  1060. }
  1061. if (s.anisotropy_enable)
  1062. args.push_back(join("max_anisotropy(", s.max_anisotropy, ")"));
  1063. if (s.lod_clamp_enable)
  1064. {
  1065. args.push_back(join("lod_clamp(", convert_to_string(s.lod_clamp_min, current_locale_radix_character), ", ",
  1066. convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")"));
  1067. }
  1068. // If we would emit no arguments, then omit the parentheses entirely. Otherwise,
  1069. // we'll wind up with a "most vexing parse" situation.
  1070. if (args.empty())
  1071. statement("constexpr sampler ",
  1072. type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
  1073. ";");
  1074. else
  1075. statement("constexpr sampler ",
  1076. type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
  1077. "(", merge(args), ");");
  1078. }
  1079. // Emit dynamic buffers here.
  1080. for (auto &dynamic_buffer : buffers_requiring_dynamic_offset)
  1081. {
  1082. if (!dynamic_buffer.second.second)
  1083. {
  1084. // Could happen if no buffer was used at requested binding point.
  1085. continue;
  1086. }
  1087. const auto &var = get<SPIRVariable>(dynamic_buffer.second.second);
  1088. uint32_t var_id = var.self;
  1089. const auto &type = get_variable_data_type(var);
  1090. string name = to_name(var.self);
  1091. uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
  1092. uint32_t arg_id = argument_buffer_ids[desc_set];
  1093. uint32_t base_index = dynamic_buffer.second.first;
  1094. if (!type.array.empty())
  1095. {
  1096. // This is complicated, because we need to support arrays of arrays.
  1097. // And it's even worse if the outermost dimension is a runtime array, because now
  1098. // all this complicated goop has to go into the shader itself. (FIXME)
  1099. if (!type.array[type.array.size() - 1])
  1100. SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
  1101. else
  1102. {
  1103. is_using_builtin_array = true;
  1104. statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name,
  1105. type_to_array_glsl(type), " =");
  1106. uint32_t dim = uint32_t(type.array.size());
  1107. uint32_t j = 0;
  1108. for (SmallVector<uint32_t> indices(type.array.size());
  1109. indices[type.array.size() - 1] < to_array_size_literal(type); j++)
  1110. {
  1111. while (dim > 0)
  1112. {
  1113. begin_scope();
  1114. --dim;
  1115. }
  1116. string arrays;
  1117. for (uint32_t i = uint32_t(type.array.size()); i; --i)
  1118. arrays += join("[", indices[i - 1], "]");
  1119. statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
  1120. to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
  1121. to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
  1122. arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
  1123. while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
  1124. {
  1125. end_scope(",");
  1126. indices[dim++] = 0;
  1127. }
  1128. }
  1129. end_scope_decl();
  1130. statement_no_indent("");
  1131. is_using_builtin_array = false;
  1132. }
  1133. }
  1134. else
  1135. {
  1136. statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(",
  1137. get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
  1138. get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
  1139. ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
  1140. }
  1141. }
  1142. // Emit buffer arrays here.
  1143. for (uint32_t array_id : buffer_arrays)
  1144. {
  1145. const auto &var = get<SPIRVariable>(array_id);
  1146. const auto &type = get_variable_data_type(var);
  1147. const auto &buffer_type = get_variable_element_type(var);
  1148. string name = to_name(array_id);
  1149. statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id), name,
  1150. "[] =");
  1151. begin_scope();
  1152. for (uint32_t i = 0; i < to_array_size_literal(type); ++i)
  1153. statement(name, "_", i, ",");
  1154. end_scope_decl();
  1155. statement_no_indent("");
  1156. }
  1157. // For some reason, without this, we end up emitting the arrays twice.
  1158. buffer_arrays.clear();
  1159. // Emit disabled fragment outputs.
  1160. std::sort(disabled_frag_outputs.begin(), disabled_frag_outputs.end());
  1161. for (uint32_t var_id : disabled_frag_outputs)
  1162. {
  1163. auto &var = get<SPIRVariable>(var_id);
  1164. add_local_variable_name(var_id);
  1165. statement(variable_decl(var), ";");
  1166. var.deferred_declaration = false;
  1167. }
  1168. }
  1169. string CompilerMSL::compile()
  1170. {
  1171. replace_illegal_entry_point_names();
  1172. ir.fixup_reserved_names();
  1173. // Do not deal with GLES-isms like precision, older extensions and such.
  1174. options.vulkan_semantics = true;
  1175. options.es = false;
  1176. options.version = 450;
  1177. backend.null_pointer_literal = "nullptr";
  1178. backend.float_literal_suffix = false;
  1179. backend.uint32_t_literal_suffix = true;
  1180. backend.int16_t_literal_suffix = "";
  1181. backend.uint16_t_literal_suffix = "";
  1182. backend.basic_int_type = "int";
  1183. backend.basic_uint_type = "uint";
  1184. backend.basic_int8_type = "char";
  1185. backend.basic_uint8_type = "uchar";
  1186. backend.basic_int16_type = "short";
  1187. backend.basic_uint16_type = "ushort";
  1188. backend.discard_literal = "discard_fragment()";
  1189. backend.demote_literal = "discard_fragment()";
  1190. backend.boolean_mix_function = "select";
  1191. backend.swizzle_is_function = false;
  1192. backend.shared_is_implied = false;
  1193. backend.use_initializer_list = true;
  1194. backend.use_typed_initializer_list = true;
  1195. backend.native_row_major_matrix = false;
  1196. backend.unsized_array_supported = false;
  1197. backend.can_declare_arrays_inline = false;
  1198. backend.allow_truncated_access_chain = true;
  1199. backend.comparison_image_samples_scalar = true;
  1200. backend.native_pointers = true;
  1201. backend.nonuniform_qualifier = "";
  1202. backend.support_small_type_sampling_result = true;
  1203. backend.supports_empty_struct = true;
  1204. // Allow Metal to use the array<T> template unless we force it off.
  1205. backend.can_return_array = !msl_options.force_native_arrays;
  1206. backend.array_is_value_type = !msl_options.force_native_arrays;
  1207. // Arrays which are part of buffer objects are never considered to be native arrays.
  1208. backend.buffer_offset_array_is_value_type = false;
  1209. backend.support_pointer_to_pointer = true;
  1210. capture_output_to_buffer = msl_options.capture_output_to_buffer;
  1211. is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
  1212. // Initialize array here rather than constructor, MSVC 2013 workaround.
  1213. for (auto &id : next_metal_resource_ids)
  1214. id = 0;
  1215. fixup_type_alias();
  1216. replace_illegal_names();
  1217. sync_entry_point_aliases_and_names();
  1218. build_function_control_flow_graphs_and_analyze();
  1219. update_active_builtins();
  1220. analyze_image_and_sampler_usage();
  1221. analyze_sampled_image_usage();
  1222. analyze_interlocked_resource_usage();
  1223. preprocess_op_codes();
  1224. build_implicit_builtins();
  1225. fixup_image_load_store_access();
  1226. set_enabled_interface_variables(get_active_interface_variables());
  1227. if (msl_options.force_active_argument_buffer_resources)
  1228. activate_argument_buffer_resources();
  1229. if (swizzle_buffer_id)
  1230. active_interface_variables.insert(swizzle_buffer_id);
  1231. if (buffer_size_buffer_id)
  1232. active_interface_variables.insert(buffer_size_buffer_id);
  1233. if (view_mask_buffer_id)
  1234. active_interface_variables.insert(view_mask_buffer_id);
  1235. if (dynamic_offsets_buffer_id)
  1236. active_interface_variables.insert(dynamic_offsets_buffer_id);
  1237. if (builtin_layer_id)
  1238. active_interface_variables.insert(builtin_layer_id);
  1239. if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
  1240. active_interface_variables.insert(builtin_dispatch_base_id);
  1241. if (builtin_sample_mask_id)
  1242. active_interface_variables.insert(builtin_sample_mask_id);
  1243. // Create structs to hold input, output and uniform variables.
  1244. // Do output first to ensure out. is declared at top of entry function.
  1245. qual_pos_var_name = "";
  1246. stage_out_var_id = add_interface_block(StorageClassOutput);
  1247. patch_stage_out_var_id = add_interface_block(StorageClassOutput, true);
  1248. stage_in_var_id = add_interface_block(StorageClassInput);
  1249. if (get_execution_model() == ExecutionModelTessellationEvaluation)
  1250. patch_stage_in_var_id = add_interface_block(StorageClassInput, true);
  1251. if (get_execution_model() == ExecutionModelTessellationControl)
  1252. stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput);
  1253. if (is_tessellation_shader())
  1254. stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput);
  1255. // Metal vertex functions that define no output must disable rasterization and return void.
  1256. if (!stage_out_var_id)
  1257. is_rasterization_disabled = true;
  1258. // Convert the use of global variables to recursively-passed function parameters
  1259. localize_global_variables();
  1260. extract_global_variables_from_functions();
  1261. // Mark any non-stage-in structs to be tightly packed.
  1262. mark_packable_structs();
  1263. reorder_type_alias();
  1264. // Add fixup hooks required by shader inputs and outputs. This needs to happen before
  1265. // the loop, so the hooks aren't added multiple times.
  1266. fix_up_shader_inputs_outputs();
  1267. // If we are using argument buffers, we create argument buffer structures for them here.
  1268. // These buffers will be used in the entry point, not the individual resources.
  1269. if (msl_options.argument_buffers)
  1270. {
  1271. if (!msl_options.supports_msl_version(2, 0))
  1272. SPIRV_CROSS_THROW("Argument buffers can only be used with MSL 2.0 and up.");
  1273. analyze_argument_buffers();
  1274. }
  1275. uint32_t pass_count = 0;
  1276. do
  1277. {
  1278. if (pass_count >= 3)
  1279. SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
  1280. reset();
  1281. // Start bindings at zero.
  1282. next_metal_resource_index_buffer = 0;
  1283. next_metal_resource_index_texture = 0;
  1284. next_metal_resource_index_sampler = 0;
  1285. for (auto &id : next_metal_resource_ids)
  1286. id = 0;
  1287. // Move constructor for this type is broken on GCC 4.9 ...
  1288. buffer.reset();
  1289. emit_header();
  1290. emit_custom_templates();
  1291. emit_custom_functions();
  1292. emit_specialization_constants_and_structs();
  1293. emit_resources();
  1294. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  1295. pass_count++;
  1296. } while (is_forcing_recompilation());
  1297. return buffer.str();
  1298. }
  1299. // Register the need to output any custom functions.
  1300. void CompilerMSL::preprocess_op_codes()
  1301. {
  1302. OpCodePreprocessor preproc(*this);
  1303. traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), preproc);
  1304. suppress_missing_prototypes = preproc.suppress_missing_prototypes;
  1305. if (preproc.uses_atomics)
  1306. {
  1307. add_header_line("#include <metal_atomic>");
  1308. add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
  1309. }
  1310. // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
  1311. // resources must disable rasterization and return void.
  1312. if (preproc.uses_resource_write)
  1313. is_rasterization_disabled = true;
  1314. // Tessellation control shaders are run as compute functions in Metal, and so
  1315. // must capture their output to a buffer.
  1316. if (get_execution_model() == ExecutionModelTessellationControl ||
  1317. (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  1318. {
  1319. is_rasterization_disabled = true;
  1320. capture_output_to_buffer = true;
  1321. }
  1322. if (preproc.needs_subgroup_invocation_id)
  1323. needs_subgroup_invocation_id = true;
  1324. if (preproc.needs_subgroup_size)
  1325. needs_subgroup_size = true;
  1326. // build_implicit_builtins() hasn't run yet, and in fact, this needs to execute
  1327. // before then so that gl_SampleID will get added; so we also need to check if
  1328. // that function would add gl_FragCoord.
  1329. if (preproc.needs_sample_id || msl_options.force_sample_rate_shading ||
  1330. (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) ||
  1331. (need_subpass_input && !msl_options.use_framebuffer_fetch_subpasses))))
  1332. needs_sample_id = true;
  1333. if (is_intersection_query())
  1334. {
  1335. add_header_line("#if __METAL_VERSION__ >= 230");
  1336. add_header_line("#include <metal_raytracing>");
  1337. add_header_line("using namespace metal::raytracing;");
  1338. add_header_line("#endif");
  1339. }
  1340. }
  1341. // Move the Private and Workgroup global variables to the entry function.
  1342. // Non-constant variables cannot have global scope in Metal.
  1343. void CompilerMSL::localize_global_variables()
  1344. {
  1345. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1346. auto iter = global_variables.begin();
  1347. while (iter != global_variables.end())
  1348. {
  1349. uint32_t v_id = *iter;
  1350. auto &var = get<SPIRVariable>(v_id);
  1351. if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
  1352. {
  1353. if (!variable_is_lut(var))
  1354. entry_func.add_local_variable(v_id);
  1355. iter = global_variables.erase(iter);
  1356. }
  1357. else
  1358. iter++;
  1359. }
  1360. }
  1361. // For any global variable accessed directly by a function,
  1362. // extract that variable and add it as an argument to that function.
  1363. void CompilerMSL::extract_global_variables_from_functions()
  1364. {
  1365. // Uniforms
  1366. unordered_set<uint32_t> global_var_ids;
  1367. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1368. if (var.storage == StorageClassInput || var.storage == StorageClassOutput ||
  1369. var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  1370. var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)
  1371. {
  1372. global_var_ids.insert(var.self);
  1373. }
  1374. });
  1375. // Local vars that are declared in the main function and accessed directly by a function
  1376. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1377. for (auto &var : entry_func.local_variables)
  1378. if (get<SPIRVariable>(var).storage != StorageClassFunction)
  1379. global_var_ids.insert(var);
  1380. std::set<uint32_t> added_arg_ids;
  1381. unordered_set<uint32_t> processed_func_ids;
  1382. extract_global_variables_from_function(ir.default_entry_point, added_arg_ids, global_var_ids, processed_func_ids);
  1383. }
  1384. // MSL does not support the use of global variables for shader input content.
  1385. // For any global variable accessed directly by the specified function, extract that variable,
  1386. // add it as an argument to that function, and the arg to the added_arg_ids collection.
  1387. void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
  1388. unordered_set<uint32_t> &global_var_ids,
  1389. unordered_set<uint32_t> &processed_func_ids)
  1390. {
  1391. // Avoid processing a function more than once
  1392. if (processed_func_ids.find(func_id) != processed_func_ids.end())
  1393. {
  1394. // Return function global variables
  1395. added_arg_ids = function_global_vars[func_id];
  1396. return;
  1397. }
  1398. processed_func_ids.insert(func_id);
  1399. auto &func = get<SPIRFunction>(func_id);
  1400. // Recursively establish global args added to functions on which we depend.
  1401. for (auto block : func.blocks)
  1402. {
  1403. auto &b = get<SPIRBlock>(block);
  1404. for (auto &i : b.ops)
  1405. {
  1406. auto ops = stream(i);
  1407. auto op = static_cast<Op>(i.op);
  1408. switch (op)
  1409. {
  1410. case OpLoad:
  1411. case OpInBoundsAccessChain:
  1412. case OpAccessChain:
  1413. case OpPtrAccessChain:
  1414. case OpArrayLength:
  1415. {
  1416. uint32_t base_id = ops[2];
  1417. if (global_var_ids.find(base_id) != global_var_ids.end())
  1418. added_arg_ids.insert(base_id);
  1419. // Use Metal's native frame-buffer fetch API for subpass inputs.
  1420. auto &type = get<SPIRType>(ops[0]);
  1421. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  1422. (!msl_options.use_framebuffer_fetch_subpasses))
  1423. {
  1424. // Implicitly reads gl_FragCoord.
  1425. assert(builtin_frag_coord_id != 0);
  1426. added_arg_ids.insert(builtin_frag_coord_id);
  1427. if (msl_options.multiview)
  1428. {
  1429. // Implicitly reads gl_ViewIndex.
  1430. assert(builtin_view_idx_id != 0);
  1431. added_arg_ids.insert(builtin_view_idx_id);
  1432. }
  1433. else if (msl_options.arrayed_subpass_input)
  1434. {
  1435. // Implicitly reads gl_Layer.
  1436. assert(builtin_layer_id != 0);
  1437. added_arg_ids.insert(builtin_layer_id);
  1438. }
  1439. }
  1440. break;
  1441. }
  1442. case OpFunctionCall:
  1443. {
  1444. // First see if any of the function call args are globals
  1445. for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
  1446. {
  1447. uint32_t arg_id = ops[arg_idx];
  1448. if (global_var_ids.find(arg_id) != global_var_ids.end())
  1449. added_arg_ids.insert(arg_id);
  1450. }
  1451. // Then recurse into the function itself to extract globals used internally in the function
  1452. uint32_t inner_func_id = ops[2];
  1453. std::set<uint32_t> inner_func_args;
  1454. extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
  1455. processed_func_ids);
  1456. added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end());
  1457. break;
  1458. }
  1459. case OpStore:
  1460. {
  1461. uint32_t base_id = ops[0];
  1462. if (global_var_ids.find(base_id) != global_var_ids.end())
  1463. added_arg_ids.insert(base_id);
  1464. uint32_t rvalue_id = ops[1];
  1465. if (global_var_ids.find(rvalue_id) != global_var_ids.end())
  1466. added_arg_ids.insert(rvalue_id);
  1467. break;
  1468. }
  1469. case OpSelect:
  1470. {
  1471. uint32_t base_id = ops[3];
  1472. if (global_var_ids.find(base_id) != global_var_ids.end())
  1473. added_arg_ids.insert(base_id);
  1474. base_id = ops[4];
  1475. if (global_var_ids.find(base_id) != global_var_ids.end())
  1476. added_arg_ids.insert(base_id);
  1477. break;
  1478. }
  1479. // Emulate texture2D atomic operations
  1480. case OpImageTexelPointer:
  1481. {
  1482. // When using the pointer, we need to know which variable it is actually loaded from.
  1483. uint32_t base_id = ops[2];
  1484. auto *var = maybe_get_backing_variable(base_id);
  1485. if (var && atomic_image_vars.count(var->self))
  1486. {
  1487. if (global_var_ids.find(base_id) != global_var_ids.end())
  1488. added_arg_ids.insert(base_id);
  1489. }
  1490. break;
  1491. }
  1492. case OpExtInst:
  1493. {
  1494. uint32_t extension_set = ops[2];
  1495. if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  1496. {
  1497. auto op_450 = static_cast<GLSLstd450>(ops[3]);
  1498. switch (op_450)
  1499. {
  1500. case GLSLstd450InterpolateAtCentroid:
  1501. case GLSLstd450InterpolateAtSample:
  1502. case GLSLstd450InterpolateAtOffset:
  1503. {
  1504. // For these, we really need the stage-in block. It is theoretically possible to pass the
  1505. // interpolant object, but a) doing so would require us to create an entirely new variable
  1506. // with Interpolant type, and b) if we have a struct or array, handling all the members and
  1507. // elements could get unwieldy fast.
  1508. added_arg_ids.insert(stage_in_var_id);
  1509. break;
  1510. }
  1511. case GLSLstd450Modf:
  1512. case GLSLstd450Frexp:
  1513. {
  1514. uint32_t base_id = ops[5];
  1515. if (global_var_ids.find(base_id) != global_var_ids.end())
  1516. added_arg_ids.insert(base_id);
  1517. break;
  1518. }
  1519. default:
  1520. break;
  1521. }
  1522. }
  1523. break;
  1524. }
  1525. case OpGroupNonUniformInverseBallot:
  1526. {
  1527. added_arg_ids.insert(builtin_subgroup_invocation_id_id);
  1528. break;
  1529. }
  1530. case OpGroupNonUniformBallotFindLSB:
  1531. case OpGroupNonUniformBallotFindMSB:
  1532. {
  1533. added_arg_ids.insert(builtin_subgroup_size_id);
  1534. break;
  1535. }
  1536. case OpGroupNonUniformBallotBitCount:
  1537. {
  1538. auto operation = static_cast<GroupOperation>(ops[3]);
  1539. switch (operation)
  1540. {
  1541. case GroupOperationReduce:
  1542. added_arg_ids.insert(builtin_subgroup_size_id);
  1543. break;
  1544. case GroupOperationInclusiveScan:
  1545. case GroupOperationExclusiveScan:
  1546. added_arg_ids.insert(builtin_subgroup_invocation_id_id);
  1547. break;
  1548. default:
  1549. break;
  1550. }
  1551. break;
  1552. }
  1553. default:
  1554. break;
  1555. }
  1556. // TODO: Add all other operations which can affect memory.
  1557. // We should consider a more unified system here to reduce boiler-plate.
  1558. // This kind of analysis is done in several places ...
  1559. }
  1560. }
  1561. function_global_vars[func_id] = added_arg_ids;
  1562. // Add the global variables as arguments to the function
  1563. if (func_id != ir.default_entry_point)
  1564. {
  1565. bool control_point_added_in = false;
  1566. bool control_point_added_out = false;
  1567. bool patch_added_in = false;
  1568. bool patch_added_out = false;
  1569. for (uint32_t arg_id : added_arg_ids)
  1570. {
  1571. auto &var = get<SPIRVariable>(arg_id);
  1572. uint32_t type_id = var.basetype;
  1573. auto *p_type = &get<SPIRType>(type_id);
  1574. BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn));
  1575. bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type);
  1576. bool is_block = has_decoration(p_type->self, DecorationBlock);
  1577. bool is_control_point_storage =
  1578. !is_patch &&
  1579. ((is_tessellation_shader() && var.storage == StorageClassInput) ||
  1580. (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput));
  1581. bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput;
  1582. bool is_builtin = is_builtin_variable(var);
  1583. bool variable_is_stage_io =
  1584. !is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  1585. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
  1586. p_type->basetype == SPIRType::Struct;
  1587. bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) &&
  1588. variable_is_stage_io;
  1589. // If output is masked it is not considered part of the global stage IO interface.
  1590. if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput)
  1591. is_redirected_to_global_stage_io = !is_stage_output_variable_masked(var);
  1592. if (is_redirected_to_global_stage_io)
  1593. {
  1594. // Tessellation control shaders see inputs and per-vertex outputs as arrays.
  1595. // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays.
  1596. // We collected them into a structure; we must pass the array of this
  1597. // structure to the function.
  1598. std::string name;
  1599. if (is_patch)
  1600. name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name;
  1601. else
  1602. name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
  1603. if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock))
  1604. {
  1605. // If we're redirecting a block, we might still need to access the original block
  1606. // variable if we're masking some members.
  1607. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++)
  1608. {
  1609. if (is_stage_output_block_member_masked(var, mbr_idx, true))
  1610. {
  1611. func.add_parameter(var.basetype, var.self, true);
  1612. break;
  1613. }
  1614. }
  1615. }
  1616. // Tessellation control shaders see inputs and per-vertex outputs as arrays.
  1617. // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays.
  1618. // We collected them into a structure; we must pass the array of this
  1619. // structure to the function.
  1620. if (var.storage == StorageClassInput)
  1621. {
  1622. auto &added_in = is_patch ? patch_added_in : control_point_added_in;
  1623. if (added_in)
  1624. continue;
  1625. arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id;
  1626. added_in = true;
  1627. }
  1628. else if (var.storage == StorageClassOutput)
  1629. {
  1630. auto &added_out = is_patch ? patch_added_out : control_point_added_out;
  1631. if (added_out)
  1632. continue;
  1633. arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id;
  1634. added_out = true;
  1635. }
  1636. type_id = get<SPIRVariable>(arg_id).basetype;
  1637. uint32_t next_id = ir.increase_bound_by(1);
  1638. func.add_parameter(type_id, next_id, true);
  1639. set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
  1640. set_name(next_id, name);
  1641. }
  1642. else if (is_builtin && has_decoration(p_type->self, DecorationBlock))
  1643. {
  1644. // Get the pointee type
  1645. type_id = get_pointee_type_id(type_id);
  1646. p_type = &get<SPIRType>(type_id);
  1647. uint32_t mbr_idx = 0;
  1648. for (auto &mbr_type_id : p_type->member_types)
  1649. {
  1650. BuiltIn builtin = BuiltInMax;
  1651. is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin);
  1652. if (is_builtin && has_active_builtin(builtin, var.storage))
  1653. {
  1654. // Add a arg variable with the same type and decorations as the member
  1655. uint32_t next_ids = ir.increase_bound_by(2);
  1656. uint32_t ptr_type_id = next_ids + 0;
  1657. uint32_t var_id = next_ids + 1;
  1658. // Make sure we have an actual pointer type,
  1659. // so that we will get the appropriate address space when declaring these builtins.
  1660. auto &ptr = set<SPIRType>(ptr_type_id, get<SPIRType>(mbr_type_id));
  1661. ptr.self = mbr_type_id;
  1662. ptr.storage = var.storage;
  1663. ptr.pointer = true;
  1664. ptr.pointer_depth++;
  1665. ptr.parent_type = mbr_type_id;
  1666. func.add_parameter(mbr_type_id, var_id, true);
  1667. set<SPIRVariable>(var_id, ptr_type_id, StorageClassFunction);
  1668. ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx];
  1669. }
  1670. mbr_idx++;
  1671. }
  1672. }
  1673. else
  1674. {
  1675. uint32_t next_id = ir.increase_bound_by(1);
  1676. func.add_parameter(type_id, next_id, true);
  1677. set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
  1678. // Ensure the existing variable has a valid name and the new variable has all the same meta info
  1679. set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
  1680. ir.meta[next_id] = ir.meta[arg_id];
  1681. }
  1682. }
  1683. }
  1684. }
  1685. // For all variables that are some form of non-input-output interface block, mark that all the structs
  1686. // that are recursively contained within the type referenced by that variable should be packed tightly.
  1687. void CompilerMSL::mark_packable_structs()
  1688. {
  1689. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1690. if (var.storage != StorageClassFunction && !is_hidden_variable(var))
  1691. {
  1692. auto &type = this->get<SPIRType>(var.basetype);
  1693. if (type.pointer &&
  1694. (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
  1695. type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
  1696. (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
  1697. mark_as_packable(type);
  1698. }
  1699. });
  1700. }
  1701. // If the specified type is a struct, it and any nested structs
  1702. // are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration,
  1703. void CompilerMSL::mark_as_packable(SPIRType &type)
  1704. {
  1705. // If this is not the base type (eg. it's a pointer or array), tunnel down
  1706. if (type.parent_type)
  1707. {
  1708. mark_as_packable(get<SPIRType>(type.parent_type));
  1709. return;
  1710. }
  1711. if (type.basetype == SPIRType::Struct)
  1712. {
  1713. set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
  1714. // Recurse
  1715. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  1716. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  1717. {
  1718. uint32_t mbr_type_id = type.member_types[mbr_idx];
  1719. auto &mbr_type = get<SPIRType>(mbr_type_id);
  1720. mark_as_packable(mbr_type);
  1721. if (mbr_type.type_alias)
  1722. {
  1723. auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
  1724. mark_as_packable(mbr_type_alias);
  1725. }
  1726. }
  1727. }
  1728. }
  1729. // If a shader input exists at the location, it is marked as being used by this shader
  1730. void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
  1731. StorageClass storage, bool fallback)
  1732. {
  1733. if (storage != StorageClassInput)
  1734. return;
  1735. uint32_t count = type_to_location_count(type);
  1736. for (uint32_t i = 0; i < count; i++)
  1737. {
  1738. location_inputs_in_use.insert(location + i);
  1739. if (fallback)
  1740. location_inputs_in_use_fallback.insert(location + i);
  1741. }
  1742. }
  1743. uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
  1744. {
  1745. auto itr = fragment_output_components.find(location);
  1746. if (itr == end(fragment_output_components))
  1747. return 4;
  1748. else
  1749. return itr->second;
  1750. }
  1751. uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype)
  1752. {
  1753. uint32_t new_type_id = ir.increase_bound_by(1);
  1754. auto &old_type = get<SPIRType>(type_id);
  1755. auto *type = &set<SPIRType>(new_type_id, old_type);
  1756. type->vecsize = components;
  1757. if (basetype != SPIRType::Unknown)
  1758. type->basetype = basetype;
  1759. type->self = new_type_id;
  1760. type->parent_type = type_id;
  1761. type->array.clear();
  1762. type->array_size_literal.clear();
  1763. type->pointer = false;
  1764. if (is_array(old_type))
  1765. {
  1766. uint32_t array_type_id = ir.increase_bound_by(1);
  1767. type = &set<SPIRType>(array_type_id, *type);
  1768. type->parent_type = new_type_id;
  1769. type->array = old_type.array;
  1770. type->array_size_literal = old_type.array_size_literal;
  1771. new_type_id = array_type_id;
  1772. }
  1773. if (old_type.pointer)
  1774. {
  1775. uint32_t ptr_type_id = ir.increase_bound_by(1);
  1776. type = &set<SPIRType>(ptr_type_id, *type);
  1777. type->self = new_type_id;
  1778. type->parent_type = new_type_id;
  1779. type->storage = old_type.storage;
  1780. type->pointer = true;
  1781. type->pointer_depth++;
  1782. new_type_id = ptr_type_id;
  1783. }
  1784. return new_type_id;
  1785. }
  1786. uint32_t CompilerMSL::build_msl_interpolant_type(uint32_t type_id, bool is_noperspective)
  1787. {
  1788. uint32_t new_type_id = ir.increase_bound_by(1);
  1789. SPIRType &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
  1790. type.basetype = SPIRType::Interpolant;
  1791. type.parent_type = type_id;
  1792. // In Metal, the pull-model interpolant type encodes perspective-vs-no-perspective in the type itself.
  1793. // Add this decoration so we know which argument to pass to the template.
  1794. if (is_noperspective)
  1795. set_decoration(new_type_id, DecorationNoPerspective);
  1796. return new_type_id;
  1797. }
  1798. bool CompilerMSL::add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
  1799. SPIRVariable &var,
  1800. const SPIRType &type,
  1801. InterfaceBlockMeta &meta)
  1802. {
  1803. // Deal with Component decorations.
  1804. const InterfaceBlockMeta::LocationMeta *location_meta = nullptr;
  1805. uint32_t location = ~0u;
  1806. if (has_decoration(var.self, DecorationLocation))
  1807. {
  1808. location = get_decoration(var.self, DecorationLocation);
  1809. auto location_meta_itr = meta.location_meta.find(location);
  1810. if (location_meta_itr != end(meta.location_meta))
  1811. location_meta = &location_meta_itr->second;
  1812. }
  1813. // Check if we need to pad fragment output to match a certain number of components.
  1814. if (location_meta)
  1815. {
  1816. bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
  1817. msl_options.pad_fragment_output_components &&
  1818. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
  1819. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1820. uint32_t start_component = get_decoration(var.self, DecorationComponent);
  1821. uint32_t type_components = type.vecsize;
  1822. uint32_t num_components = location_meta->num_components;
  1823. if (pad_fragment_output)
  1824. {
  1825. uint32_t locn = get_decoration(var.self, DecorationLocation);
  1826. num_components = std::max(num_components, get_target_components_for_fragment_location(locn));
  1827. }
  1828. // We have already declared an IO block member as m_location_N.
  1829. // Just emit an early-declared variable and fixup as needed.
  1830. // Arrays need to be unrolled here since each location might need a different number of components.
  1831. entry_func.add_local_variable(var.self);
  1832. vars_needing_early_declaration.push_back(var.self);
  1833. if (var.storage == StorageClassInput)
  1834. {
  1835. entry_func.fixup_hooks_in.push_back([=, &type, &var]() {
  1836. if (!type.array.empty())
  1837. {
  1838. uint32_t array_size = to_array_size_literal(type);
  1839. for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
  1840. {
  1841. statement(to_name(var.self), "[", loc_off, "]", " = ", ib_var_ref,
  1842. ".m_location_", location + loc_off,
  1843. vector_swizzle(type_components, start_component), ";");
  1844. }
  1845. }
  1846. else
  1847. {
  1848. statement(to_name(var.self), " = ", ib_var_ref, ".m_location_", location,
  1849. vector_swizzle(type_components, start_component), ";");
  1850. }
  1851. });
  1852. }
  1853. else
  1854. {
  1855. entry_func.fixup_hooks_out.push_back([=, &type, &var]() {
  1856. if (!type.array.empty())
  1857. {
  1858. uint32_t array_size = to_array_size_literal(type);
  1859. for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
  1860. {
  1861. statement(ib_var_ref, ".m_location_", location + loc_off,
  1862. vector_swizzle(type_components, start_component), " = ",
  1863. to_name(var.self), "[", loc_off, "];");
  1864. }
  1865. }
  1866. else
  1867. {
  1868. statement(ib_var_ref, ".m_location_", location,
  1869. vector_swizzle(type_components, start_component), " = ", to_name(var.self), ";");
  1870. }
  1871. });
  1872. }
  1873. return true;
  1874. }
  1875. else
  1876. return false;
  1877. }
  1878. void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  1879. SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta)
  1880. {
  1881. bool is_builtin = is_builtin_variable(var);
  1882. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  1883. bool is_flat = has_decoration(var.self, DecorationFlat);
  1884. bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
  1885. bool is_centroid = has_decoration(var.self, DecorationCentroid);
  1886. bool is_sample = has_decoration(var.self, DecorationSample);
  1887. // Add a reference to the variable type to the interface struct.
  1888. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  1889. uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
  1890. var.basetype = type_id;
  1891. type_id = get_pointee_type_id(var.basetype);
  1892. if (meta.strip_array && is_array(get<SPIRType>(type_id)))
  1893. type_id = get<SPIRType>(type_id).parent_type;
  1894. auto &type = get<SPIRType>(type_id);
  1895. uint32_t target_components = 0;
  1896. uint32_t type_components = type.vecsize;
  1897. bool padded_output = false;
  1898. bool padded_input = false;
  1899. uint32_t start_component = 0;
  1900. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1901. if (add_component_variable_to_interface_block(storage, ib_var_ref, var, type, meta))
  1902. return;
  1903. bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
  1904. msl_options.pad_fragment_output_components &&
  1905. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
  1906. if (pad_fragment_output)
  1907. {
  1908. uint32_t locn = get_decoration(var.self, DecorationLocation);
  1909. target_components = get_target_components_for_fragment_location(locn);
  1910. if (type_components < target_components)
  1911. {
  1912. // Make a new type here.
  1913. type_id = build_extended_vector_type(type_id, target_components);
  1914. padded_output = true;
  1915. }
  1916. }
  1917. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  1918. ib_type.member_types.push_back(build_msl_interpolant_type(type_id, is_noperspective));
  1919. else
  1920. ib_type.member_types.push_back(type_id);
  1921. // Give the member a name
  1922. string mbr_name = ensure_valid_name(to_expression(var.self), "m");
  1923. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  1924. // Update the original variable reference to include the structure reference
  1925. string qual_var_name = ib_var_ref + "." + mbr_name;
  1926. // If using pull-model interpolation, need to add a call to the correct interpolation method.
  1927. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  1928. {
  1929. if (is_centroid)
  1930. qual_var_name += ".interpolate_at_centroid()";
  1931. else if (is_sample)
  1932. qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  1933. else
  1934. qual_var_name += ".interpolate_at_center()";
  1935. }
  1936. if (padded_output || padded_input)
  1937. {
  1938. entry_func.add_local_variable(var.self);
  1939. vars_needing_early_declaration.push_back(var.self);
  1940. if (padded_output)
  1941. {
  1942. entry_func.fixup_hooks_out.push_back([=, &var]() {
  1943. statement(qual_var_name, vector_swizzle(type_components, start_component), " = ", to_name(var.self),
  1944. ";");
  1945. });
  1946. }
  1947. else
  1948. {
  1949. entry_func.fixup_hooks_in.push_back([=, &var]() {
  1950. statement(to_name(var.self), " = ", qual_var_name, vector_swizzle(type_components, start_component),
  1951. ";");
  1952. });
  1953. }
  1954. }
  1955. else if (!meta.strip_array)
  1956. ir.meta[var.self].decoration.qualified_alias = qual_var_name;
  1957. if (var.storage == StorageClassOutput && var.initializer != ID(0))
  1958. {
  1959. if (padded_output || padded_input)
  1960. {
  1961. entry_func.fixup_hooks_in.push_back(
  1962. [=, &var]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
  1963. }
  1964. else
  1965. {
  1966. if (meta.strip_array)
  1967. {
  1968. entry_func.fixup_hooks_in.push_back([=, &var]() {
  1969. uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex);
  1970. auto invocation = to_tesc_invocation_id();
  1971. statement(to_expression(stage_out_ptr_var_id), "[",
  1972. invocation, "].",
  1973. to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[",
  1974. invocation, "];");
  1975. });
  1976. }
  1977. else
  1978. {
  1979. entry_func.fixup_hooks_in.push_back([=, &var]() {
  1980. statement(qual_var_name, " = ", to_expression(var.initializer), ";");
  1981. });
  1982. }
  1983. }
  1984. }
  1985. // Copy the variable location from the original variable to the member
  1986. if (get_decoration_bitset(var.self).get(DecorationLocation))
  1987. {
  1988. uint32_t locn = get_decoration(var.self, DecorationLocation);
  1989. uint32_t comp = get_decoration(var.self, DecorationComponent);
  1990. if (storage == StorageClassInput)
  1991. {
  1992. type_id = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
  1993. var.basetype = type_id;
  1994. type_id = get_pointee_type_id(type_id);
  1995. if (meta.strip_array && is_array(get<SPIRType>(type_id)))
  1996. type_id = get<SPIRType>(type_id).parent_type;
  1997. if (pull_model_inputs.count(var.self))
  1998. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(type_id, is_noperspective);
  1999. else
  2000. ib_type.member_types[ib_mbr_idx] = type_id;
  2001. }
  2002. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2003. if (comp)
  2004. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2005. mark_location_as_used_by_shader(locn, get<SPIRType>(type_id), storage);
  2006. }
  2007. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2008. {
  2009. uint32_t locn = inputs_by_builtin[builtin].location;
  2010. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2011. mark_location_as_used_by_shader(locn, type, storage);
  2012. }
  2013. if (get_decoration_bitset(var.self).get(DecorationComponent))
  2014. {
  2015. uint32_t component = get_decoration(var.self, DecorationComponent);
  2016. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, component);
  2017. }
  2018. if (get_decoration_bitset(var.self).get(DecorationIndex))
  2019. {
  2020. uint32_t index = get_decoration(var.self, DecorationIndex);
  2021. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
  2022. }
  2023. // Mark the member as builtin if needed
  2024. if (is_builtin)
  2025. {
  2026. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2027. if (builtin == BuiltInPosition && storage == StorageClassOutput)
  2028. qual_pos_var_name = qual_var_name;
  2029. }
  2030. // Copy interpolation decorations if needed
  2031. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2032. {
  2033. if (is_flat)
  2034. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2035. if (is_noperspective)
  2036. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2037. if (is_centroid)
  2038. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2039. if (is_sample)
  2040. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2041. }
  2042. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2043. }
  2044. void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  2045. SPIRType &ib_type, SPIRVariable &var,
  2046. InterfaceBlockMeta &meta)
  2047. {
  2048. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2049. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  2050. uint32_t elem_cnt = 0;
  2051. if (add_component_variable_to_interface_block(storage, ib_var_ref, var, var_type, meta))
  2052. return;
  2053. if (is_matrix(var_type))
  2054. {
  2055. if (is_array(var_type))
  2056. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
  2057. elem_cnt = var_type.columns;
  2058. }
  2059. else if (is_array(var_type))
  2060. {
  2061. if (var_type.array.size() != 1)
  2062. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
  2063. elem_cnt = to_array_size_literal(var_type);
  2064. }
  2065. bool is_builtin = is_builtin_variable(var);
  2066. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2067. bool is_flat = has_decoration(var.self, DecorationFlat);
  2068. bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
  2069. bool is_centroid = has_decoration(var.self, DecorationCentroid);
  2070. bool is_sample = has_decoration(var.self, DecorationSample);
  2071. auto *usable_type = &var_type;
  2072. if (usable_type->pointer)
  2073. usable_type = &get<SPIRType>(usable_type->parent_type);
  2074. while (is_array(*usable_type) || is_matrix(*usable_type))
  2075. usable_type = &get<SPIRType>(usable_type->parent_type);
  2076. // If a builtin, force it to have the proper name.
  2077. if (is_builtin)
  2078. set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
  2079. bool flatten_from_ib_var = false;
  2080. string flatten_from_ib_mbr_name;
  2081. if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
  2082. {
  2083. // Also declare [[clip_distance]] attribute here.
  2084. uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
  2085. ib_type.member_types.push_back(get_variable_data_type_id(var));
  2086. set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
  2087. flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
  2088. set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
  2089. // When we flatten, we flatten directly from the "out" struct,
  2090. // not from a function variable.
  2091. flatten_from_ib_var = true;
  2092. if (!msl_options.enable_clip_distance_user_varying)
  2093. return;
  2094. }
  2095. else if (!meta.strip_array)
  2096. {
  2097. // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
  2098. entry_func.add_local_variable(var.self);
  2099. // We need to declare the variable early and at entry-point scope.
  2100. vars_needing_early_declaration.push_back(var.self);
  2101. }
  2102. for (uint32_t i = 0; i < elem_cnt; i++)
  2103. {
  2104. // Add a reference to the variable type to the interface struct.
  2105. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2106. uint32_t target_components = 0;
  2107. bool padded_output = false;
  2108. uint32_t type_id = usable_type->self;
  2109. // Check if we need to pad fragment output to match a certain number of components.
  2110. if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
  2111. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
  2112. {
  2113. uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
  2114. target_components = get_target_components_for_fragment_location(locn);
  2115. if (usable_type->vecsize < target_components)
  2116. {
  2117. // Make a new type here.
  2118. type_id = build_extended_vector_type(usable_type->self, target_components);
  2119. padded_output = true;
  2120. }
  2121. }
  2122. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2123. ib_type.member_types.push_back(build_msl_interpolant_type(get_pointee_type_id(type_id), is_noperspective));
  2124. else
  2125. ib_type.member_types.push_back(get_pointee_type_id(type_id));
  2126. // Give the member a name
  2127. string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
  2128. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2129. // There is no qualified alias since we need to flatten the internal array on return.
  2130. if (get_decoration_bitset(var.self).get(DecorationLocation))
  2131. {
  2132. uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
  2133. uint32_t comp = get_decoration(var.self, DecorationComponent);
  2134. if (storage == StorageClassInput)
  2135. {
  2136. var.basetype = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
  2137. uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, comp, 0, meta.strip_array);
  2138. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2139. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2140. else
  2141. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2142. }
  2143. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2144. if (comp)
  2145. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2146. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2147. }
  2148. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2149. {
  2150. uint32_t locn = inputs_by_builtin[builtin].location + i;
  2151. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2152. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2153. }
  2154. else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
  2155. {
  2156. // Declare the Clip/CullDistance as [[user(clip/cullN)]].
  2157. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2158. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
  2159. }
  2160. if (get_decoration_bitset(var.self).get(DecorationIndex))
  2161. {
  2162. uint32_t index = get_decoration(var.self, DecorationIndex);
  2163. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
  2164. }
  2165. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2166. {
  2167. // Copy interpolation decorations if needed
  2168. if (is_flat)
  2169. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2170. if (is_noperspective)
  2171. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2172. if (is_centroid)
  2173. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2174. if (is_sample)
  2175. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2176. }
  2177. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2178. // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
  2179. if (!meta.strip_array)
  2180. {
  2181. switch (storage)
  2182. {
  2183. case StorageClassInput:
  2184. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2185. if (pull_model_inputs.count(var.self))
  2186. {
  2187. string lerp_call;
  2188. if (is_centroid)
  2189. lerp_call = ".interpolate_at_centroid()";
  2190. else if (is_sample)
  2191. lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2192. else
  2193. lerp_call = ".interpolate_at_center()";
  2194. statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
  2195. }
  2196. else
  2197. {
  2198. statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";");
  2199. }
  2200. });
  2201. break;
  2202. case StorageClassOutput:
  2203. entry_func.fixup_hooks_out.push_back([=, &var]() {
  2204. if (padded_output)
  2205. {
  2206. auto &padded_type = this->get<SPIRType>(type_id);
  2207. statement(
  2208. ib_var_ref, ".", mbr_name, " = ",
  2209. remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
  2210. ";");
  2211. }
  2212. else if (flatten_from_ib_var)
  2213. statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i,
  2214. "];");
  2215. else
  2216. statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
  2217. });
  2218. break;
  2219. default:
  2220. break;
  2221. }
  2222. }
  2223. }
  2224. }
  2225. void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  2226. SPIRType &ib_type, SPIRVariable &var,
  2227. uint32_t mbr_idx, InterfaceBlockMeta &meta)
  2228. {
  2229. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2230. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  2231. BuiltIn builtin = BuiltInMax;
  2232. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  2233. bool is_flat =
  2234. has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
  2235. bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
  2236. has_decoration(var.self, DecorationNoPerspective);
  2237. bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
  2238. has_decoration(var.self, DecorationCentroid);
  2239. bool is_sample =
  2240. has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
  2241. uint32_t mbr_type_id = var_type.member_types[mbr_idx];
  2242. auto &mbr_type = get<SPIRType>(mbr_type_id);
  2243. uint32_t elem_cnt = 0;
  2244. if (is_matrix(mbr_type))
  2245. {
  2246. if (is_array(mbr_type))
  2247. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
  2248. elem_cnt = mbr_type.columns;
  2249. }
  2250. else if (is_array(mbr_type))
  2251. {
  2252. if (mbr_type.array.size() != 1)
  2253. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
  2254. elem_cnt = to_array_size_literal(mbr_type);
  2255. }
  2256. auto *usable_type = &mbr_type;
  2257. if (usable_type->pointer)
  2258. usable_type = &get<SPIRType>(usable_type->parent_type);
  2259. while (is_array(*usable_type) || is_matrix(*usable_type))
  2260. usable_type = &get<SPIRType>(usable_type->parent_type);
  2261. bool flatten_from_ib_var = false;
  2262. string flatten_from_ib_mbr_name;
  2263. if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
  2264. {
  2265. // Also declare [[clip_distance]] attribute here.
  2266. uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
  2267. ib_type.member_types.push_back(mbr_type_id);
  2268. set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
  2269. flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
  2270. set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
  2271. // When we flatten, we flatten directly from the "out" struct,
  2272. // not from a function variable.
  2273. flatten_from_ib_var = true;
  2274. if (!msl_options.enable_clip_distance_user_varying)
  2275. return;
  2276. }
  2277. for (uint32_t i = 0; i < elem_cnt; i++)
  2278. {
  2279. // Add a reference to the variable type to the interface struct.
  2280. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2281. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2282. ib_type.member_types.push_back(build_msl_interpolant_type(usable_type->self, is_noperspective));
  2283. else
  2284. ib_type.member_types.push_back(usable_type->self);
  2285. // Give the member a name
  2286. string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m");
  2287. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2288. if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
  2289. {
  2290. uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
  2291. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2292. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2293. }
  2294. else if (has_decoration(var.self, DecorationLocation))
  2295. {
  2296. uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i;
  2297. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2298. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2299. }
  2300. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2301. {
  2302. uint32_t locn = inputs_by_builtin[builtin].location + i;
  2303. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2304. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2305. }
  2306. else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
  2307. {
  2308. // Declare the Clip/CullDistance as [[user(clip/cullN)]].
  2309. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2310. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
  2311. }
  2312. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
  2313. SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense.");
  2314. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2315. {
  2316. // Copy interpolation decorations if needed
  2317. if (is_flat)
  2318. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2319. if (is_noperspective)
  2320. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2321. if (is_centroid)
  2322. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2323. if (is_sample)
  2324. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2325. }
  2326. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2327. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
  2328. // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
  2329. if (!meta.strip_array && meta.allow_local_declaration)
  2330. {
  2331. switch (storage)
  2332. {
  2333. case StorageClassInput:
  2334. entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
  2335. if (pull_model_inputs.count(var.self))
  2336. {
  2337. string lerp_call;
  2338. if (is_centroid)
  2339. lerp_call = ".interpolate_at_centroid()";
  2340. else if (is_sample)
  2341. lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2342. else
  2343. lerp_call = ".interpolate_at_center()";
  2344. statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref,
  2345. ".", mbr_name, lerp_call, ";");
  2346. }
  2347. else
  2348. {
  2349. statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref,
  2350. ".", mbr_name, ";");
  2351. }
  2352. });
  2353. break;
  2354. case StorageClassOutput:
  2355. entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
  2356. if (flatten_from_ib_var)
  2357. {
  2358. statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i,
  2359. "];");
  2360. }
  2361. else
  2362. {
  2363. statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".",
  2364. to_member_name(var_type, mbr_idx), "[", i, "];");
  2365. }
  2366. });
  2367. break;
  2368. default:
  2369. break;
  2370. }
  2371. }
  2372. }
  2373. }
  2374. void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  2375. SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx,
  2376. InterfaceBlockMeta &meta)
  2377. {
  2378. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  2379. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2380. BuiltIn builtin = BuiltInMax;
  2381. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  2382. bool is_flat =
  2383. has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
  2384. bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
  2385. has_decoration(var.self, DecorationNoPerspective);
  2386. bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
  2387. has_decoration(var.self, DecorationCentroid);
  2388. bool is_sample =
  2389. has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
  2390. // Add a reference to the member to the interface struct.
  2391. uint32_t mbr_type_id = var_type.member_types[mbr_idx];
  2392. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2393. mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin);
  2394. var_type.member_types[mbr_idx] = mbr_type_id;
  2395. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2396. ib_type.member_types.push_back(build_msl_interpolant_type(mbr_type_id, is_noperspective));
  2397. else
  2398. ib_type.member_types.push_back(mbr_type_id);
  2399. // Give the member a name
  2400. string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m");
  2401. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2402. // Update the original variable reference to include the structure reference
  2403. string qual_var_name = ib_var_ref + "." + mbr_name;
  2404. // If using pull-model interpolation, need to add a call to the correct interpolation method.
  2405. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2406. {
  2407. if (is_centroid)
  2408. qual_var_name += ".interpolate_at_centroid()";
  2409. else if (is_sample)
  2410. qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2411. else
  2412. qual_var_name += ".interpolate_at_center()";
  2413. }
  2414. bool flatten_stage_out = false;
  2415. if (is_builtin && !meta.strip_array)
  2416. {
  2417. // For the builtin gl_PerVertex, we cannot treat it as a block anyways,
  2418. // so redirect to qualified name.
  2419. set_member_qualified_name(var_type.self, mbr_idx, qual_var_name);
  2420. }
  2421. else if (!meta.strip_array && meta.allow_local_declaration)
  2422. {
  2423. // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
  2424. switch (storage)
  2425. {
  2426. case StorageClassInput:
  2427. entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
  2428. statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";");
  2429. });
  2430. break;
  2431. case StorageClassOutput:
  2432. flatten_stage_out = true;
  2433. entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
  2434. statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";");
  2435. });
  2436. break;
  2437. default:
  2438. break;
  2439. }
  2440. }
  2441. // Copy the variable location from the original variable to the member
  2442. if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
  2443. {
  2444. uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
  2445. uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
  2446. if (storage == StorageClassInput)
  2447. {
  2448. mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, comp, 0, meta.strip_array);
  2449. var_type.member_types[mbr_idx] = mbr_type_id;
  2450. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2451. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2452. else
  2453. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2454. }
  2455. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2456. mark_location_as_used_by_shader(locn, get<SPIRType>(mbr_type_id), storage);
  2457. }
  2458. else if (has_decoration(var.self, DecorationLocation))
  2459. {
  2460. // The block itself might have a location and in this case, all members of the block
  2461. // receive incrementing locations.
  2462. uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
  2463. if (storage == StorageClassInput)
  2464. {
  2465. mbr_type_id = ensure_correct_input_type(mbr_type_id, locn, 0, 0, meta.strip_array);
  2466. var_type.member_types[mbr_idx] = mbr_type_id;
  2467. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2468. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2469. else
  2470. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2471. }
  2472. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2473. mark_location_as_used_by_shader(locn, get<SPIRType>(mbr_type_id), storage);
  2474. }
  2475. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2476. {
  2477. uint32_t locn = 0;
  2478. auto builtin_itr = inputs_by_builtin.find(builtin);
  2479. if (builtin_itr != end(inputs_by_builtin))
  2480. locn = builtin_itr->second.location;
  2481. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2482. mark_location_as_used_by_shader(locn, get<SPIRType>(mbr_type_id), storage);
  2483. }
  2484. // Copy the component location, if present.
  2485. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
  2486. {
  2487. uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
  2488. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2489. }
  2490. // Mark the member as builtin if needed
  2491. if (is_builtin)
  2492. {
  2493. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2494. if (builtin == BuiltInPosition && storage == StorageClassOutput)
  2495. qual_pos_var_name = qual_var_name;
  2496. }
  2497. const SPIRConstant *c = nullptr;
  2498. if (!flatten_stage_out && var.storage == StorageClassOutput &&
  2499. var.initializer != ID(0) && (c = maybe_get<SPIRConstant>(var.initializer)))
  2500. {
  2501. if (meta.strip_array)
  2502. {
  2503. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2504. auto &type = this->get<SPIRType>(var.basetype);
  2505. uint32_t index = get_extended_member_decoration(var.self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex);
  2506. auto invocation = to_tesc_invocation_id();
  2507. auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]");
  2508. statement(to_expression(stage_out_ptr_var_id), "[",
  2509. invocation, "].",
  2510. to_member_name(ib_type, index), " = ",
  2511. constant_chain, ".", to_member_name(type, mbr_idx), ";");
  2512. });
  2513. }
  2514. else
  2515. {
  2516. entry_func.fixup_hooks_in.push_back([=]() {
  2517. statement(qual_var_name, " = ", constant_expression(
  2518. this->get<SPIRConstant>(c->subconstants[mbr_idx])), ";");
  2519. });
  2520. }
  2521. }
  2522. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2523. {
  2524. // Copy interpolation decorations if needed
  2525. if (is_flat)
  2526. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2527. if (is_noperspective)
  2528. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2529. if (is_centroid)
  2530. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2531. if (is_sample)
  2532. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2533. }
  2534. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2535. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
  2536. }
  2537. // In Metal, the tessellation levels are stored as tightly packed half-precision floating point values.
  2538. // But, stage-in attribute offsets and strides must be multiples of four, so we can't pass the levels
  2539. // individually. Therefore, we must pass them as vectors. Triangles get a single float4, with the outer
  2540. // levels in 'xyz' and the inner level in 'w'. Quads get a float4 containing the outer levels and a
  2541. // float2 containing the inner levels.
  2542. void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type,
  2543. SPIRVariable &var)
  2544. {
  2545. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2546. auto &var_type = get_variable_element_type(var);
  2547. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2548. // Force the variable to have the proper name.
  2549. string var_name = builtin_to_glsl(builtin, StorageClassFunction);
  2550. set_name(var.self, var_name);
  2551. // We need to declare the variable early and at entry-point scope.
  2552. entry_func.add_local_variable(var.self);
  2553. vars_needing_early_declaration.push_back(var.self);
  2554. bool triangles = get_execution_mode_bitset().get(ExecutionModeTriangles);
  2555. string mbr_name;
  2556. // Add a reference to the variable type to the interface struct.
  2557. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2558. const auto mark_locations = [&](const SPIRType &new_var_type) {
  2559. if (get_decoration_bitset(var.self).get(DecorationLocation))
  2560. {
  2561. uint32_t locn = get_decoration(var.self, DecorationLocation);
  2562. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2563. mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
  2564. }
  2565. else if (inputs_by_builtin.count(builtin))
  2566. {
  2567. uint32_t locn = inputs_by_builtin[builtin].location;
  2568. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2569. mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
  2570. }
  2571. };
  2572. if (triangles)
  2573. {
  2574. // Triangles are tricky, because we want only one member in the struct.
  2575. mbr_name = "gl_TessLevel";
  2576. // If we already added the other one, we can skip this step.
  2577. if (!added_builtin_tess_level)
  2578. {
  2579. uint32_t type_id = build_extended_vector_type(var_type.self, 4);
  2580. ib_type.member_types.push_back(type_id);
  2581. // Give the member a name
  2582. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2583. // We cannot decorate both, but the important part is that
  2584. // it's marked as builtin so we can get automatic attribute assignment if needed.
  2585. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2586. mark_locations(var_type);
  2587. added_builtin_tess_level = true;
  2588. }
  2589. }
  2590. else
  2591. {
  2592. mbr_name = var_name;
  2593. uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2);
  2594. uint32_t ptr_type_id = ir.increase_bound_by(1);
  2595. auto &new_var_type = set<SPIRType>(ptr_type_id, get<SPIRType>(type_id));
  2596. new_var_type.pointer = true;
  2597. new_var_type.pointer_depth++;
  2598. new_var_type.storage = StorageClassInput;
  2599. new_var_type.parent_type = type_id;
  2600. ib_type.member_types.push_back(type_id);
  2601. // Give the member a name
  2602. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2603. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2604. mark_locations(new_var_type);
  2605. }
  2606. if (builtin == BuiltInTessLevelOuter)
  2607. {
  2608. entry_func.fixup_hooks_in.push_back([=]() {
  2609. statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".x;");
  2610. statement(var_name, "[1] = ", ib_var_ref, ".", mbr_name, ".y;");
  2611. statement(var_name, "[2] = ", ib_var_ref, ".", mbr_name, ".z;");
  2612. if (!triangles)
  2613. statement(var_name, "[3] = ", ib_var_ref, ".", mbr_name, ".w;");
  2614. });
  2615. }
  2616. else
  2617. {
  2618. entry_func.fixup_hooks_in.push_back([=]() {
  2619. if (triangles)
  2620. {
  2621. statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".w;");
  2622. }
  2623. else
  2624. {
  2625. statement(var_name, "[0] = ", ib_var_ref, ".", mbr_name, ".x;");
  2626. statement(var_name, "[1] = ", ib_var_ref, ".", mbr_name, ".y;");
  2627. }
  2628. });
  2629. }
  2630. }
  2631. bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const
  2632. {
  2633. if (storage == StorageClassOutput)
  2634. return !capture_output_to_buffer;
  2635. else if (storage == StorageClassInput)
  2636. return !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup);
  2637. else
  2638. return false;
  2639. }
  2640. string CompilerMSL::to_tesc_invocation_id()
  2641. {
  2642. if (msl_options.multi_patch_workgroup)
  2643. {
  2644. // n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
  2645. // not the TC invocation ID.
  2646. return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices);
  2647. }
  2648. else
  2649. return builtin_to_glsl(BuiltInInvocationId, StorageClassInput);
  2650. }
  2651. void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array)
  2652. {
  2653. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2654. bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup);
  2655. if (threadgroup_storage && msl_options.multi_patch_workgroup)
  2656. {
  2657. // We need one threadgroup block per patch, so fake this.
  2658. entry_func.fixup_hooks_in.push_back([this, &masked_var]() {
  2659. auto &type = get_variable_data_type(masked_var);
  2660. add_local_variable_name(masked_var.self);
  2661. bool old_is_builtin = is_using_builtin_array;
  2662. is_using_builtin_array = true;
  2663. const uint32_t max_control_points_per_patch = 32u;
  2664. uint32_t max_num_instances =
  2665. (max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
  2666. get_entry_point().output_vertices;
  2667. statement("threadgroup ", type_to_glsl(type), " ",
  2668. "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]",
  2669. type_to_array_glsl(type), ";");
  2670. // Assign a threadgroup slice to each PrimitiveID.
  2671. // We assume here that workgroup size is rounded to 32,
  2672. // since that's the maximum number of control points per patch.
  2673. // We cannot size the array based on fixed dispatch parameters,
  2674. // since Metal does not allow that. :(
  2675. // FIXME: We will likely need an option to support passing down target workgroup size,
  2676. // so we can emit appropriate size here.
  2677. statement("threadgroup ", type_to_glsl(type), " ",
  2678. "(&", to_name(masked_var.self), ")",
  2679. type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[",
  2680. "(", to_expression(builtin_invocation_id_id), ".x / ",
  2681. get_entry_point().output_vertices, ") % ",
  2682. max_num_instances, "];");
  2683. is_using_builtin_array = old_is_builtin;
  2684. });
  2685. }
  2686. else
  2687. {
  2688. entry_func.add_local_variable(masked_var.self);
  2689. }
  2690. if (!threadgroup_storage)
  2691. {
  2692. vars_needing_early_declaration.push_back(masked_var.self);
  2693. }
  2694. else if (masked_var.initializer)
  2695. {
  2696. // Cannot directly initialize threadgroup variables. Need fixup hooks.
  2697. ID initializer = masked_var.initializer;
  2698. if (strip_array)
  2699. {
  2700. entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
  2701. auto invocation = to_tesc_invocation_id();
  2702. statement(to_expression(masked_var.self), "[",
  2703. invocation, "] = ",
  2704. to_expression(initializer), "[",
  2705. invocation, "];");
  2706. });
  2707. }
  2708. else
  2709. {
  2710. entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
  2711. statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";");
  2712. });
  2713. }
  2714. }
  2715. }
  2716. void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
  2717. SPIRVariable &var, InterfaceBlockMeta &meta)
  2718. {
  2719. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2720. // Tessellation control I/O variables and tessellation evaluation per-point inputs are
  2721. // usually declared as arrays. In these cases, we want to add the element type to the
  2722. // interface block, since in Metal it's the interface block itself which is arrayed.
  2723. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  2724. bool is_builtin = is_builtin_variable(var);
  2725. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2726. bool is_block = has_decoration(var_type.self, DecorationBlock);
  2727. // If stage variables are masked out, emit them as plain variables instead.
  2728. // For builtins, we query them one by one later.
  2729. // IO blocks are not masked here, we need to mask them per-member instead.
  2730. if (storage == StorageClassOutput && is_stage_output_variable_masked(var))
  2731. {
  2732. // If we ignore an output, we must still emit it, since it might be used by app.
  2733. // Instead, just emit it as early declaration.
  2734. emit_local_masked_variable(var, meta.strip_array);
  2735. return;
  2736. }
  2737. if (var_type.basetype == SPIRType::Struct)
  2738. {
  2739. bool block_requires_flattening = variable_storage_requires_stage_io(storage) || is_block;
  2740. bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration;
  2741. if (needs_local_declaration)
  2742. {
  2743. // For I/O blocks or structs, we will need to pass the block itself around
  2744. // to functions if they are used globally in leaf functions.
  2745. // Rather than passing down member by member,
  2746. // we unflatten I/O blocks while running the shader,
  2747. // and pass the actual struct type down to leaf functions.
  2748. // We then unflatten inputs, and flatten outputs in the "fixup" stages.
  2749. emit_local_masked_variable(var, meta.strip_array);
  2750. }
  2751. if (!block_requires_flattening)
  2752. {
  2753. // In Metal tessellation shaders, the interface block itself is arrayed. This makes things
  2754. // very complicated, since stage-in structures in MSL don't support nested structures.
  2755. // Luckily, for stage-out when capturing output, we can avoid this and just add
  2756. // composite members directly, because the stage-out structure is stored to a buffer,
  2757. // not returned.
  2758. add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  2759. }
  2760. else
  2761. {
  2762. bool masked_block = false;
  2763. // Flatten the struct members into the interface struct
  2764. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
  2765. {
  2766. builtin = BuiltInMax;
  2767. is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  2768. auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]);
  2769. if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array))
  2770. {
  2771. if (is_block)
  2772. masked_block = true;
  2773. // Non-builtin block output variables are just ignored, since they will still access
  2774. // the block variable as-is. They're just not flattened.
  2775. if (is_builtin && !meta.strip_array)
  2776. {
  2777. // Emit a fake variable instead.
  2778. uint32_t ids = ir.increase_bound_by(2);
  2779. uint32_t ptr_type_id = ids + 0;
  2780. uint32_t var_id = ids + 1;
  2781. auto ptr_type = mbr_type;
  2782. ptr_type.pointer = true;
  2783. ptr_type.pointer_depth++;
  2784. ptr_type.parent_type = var_type.member_types[mbr_idx];
  2785. ptr_type.storage = StorageClassOutput;
  2786. uint32_t initializer = 0;
  2787. if (var.initializer)
  2788. if (auto *c = maybe_get<SPIRConstant>(var.initializer))
  2789. initializer = c->subconstants[mbr_idx];
  2790. set<SPIRType>(ptr_type_id, ptr_type);
  2791. set<SPIRVariable>(var_id, ptr_type_id, StorageClassOutput, initializer);
  2792. entry_func.add_local_variable(var_id);
  2793. vars_needing_early_declaration.push_back(var_id);
  2794. set_name(var_id, builtin_to_glsl(builtin, StorageClassOutput));
  2795. set_decoration(var_id, DecorationBuiltIn, builtin);
  2796. }
  2797. }
  2798. else if (!is_builtin || has_active_builtin(builtin, storage))
  2799. {
  2800. bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type);
  2801. bool attribute_load_store =
  2802. storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
  2803. bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
  2804. // Clip/CullDistance always need to be declared as user attributes.
  2805. if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
  2806. is_builtin = false;
  2807. if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
  2808. {
  2809. add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx,
  2810. meta);
  2811. }
  2812. else
  2813. {
  2814. add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, meta);
  2815. }
  2816. }
  2817. }
  2818. // If we're redirecting a block, we might still need to access the original block
  2819. // variable if we're masking some members.
  2820. if (masked_block && !needs_local_declaration &&
  2821. (!is_builtin_variable(var) || get_execution_model() == ExecutionModelTessellationControl))
  2822. {
  2823. if (is_builtin_variable(var))
  2824. {
  2825. // Ensure correct names for the block members if we're actually going to
  2826. // declare gl_PerVertex.
  2827. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
  2828. {
  2829. set_member_name(var_type.self, mbr_idx, builtin_to_glsl(
  2830. BuiltIn(get_member_decoration(var_type.self, mbr_idx, DecorationBuiltIn)),
  2831. StorageClassOutput));
  2832. }
  2833. set_name(var_type.self, "gl_PerVertex");
  2834. set_name(var.self, "gl_out_masked");
  2835. stage_out_masked_builtin_type_id = var_type.self;
  2836. }
  2837. emit_local_masked_variable(var, meta.strip_array);
  2838. }
  2839. }
  2840. }
  2841. else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput &&
  2842. !meta.strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner))
  2843. {
  2844. add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var);
  2845. }
  2846. else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char ||
  2847. type_is_integral(var_type) || type_is_floating_point(var_type))
  2848. {
  2849. if (!is_builtin || has_active_builtin(builtin, storage))
  2850. {
  2851. bool is_composite_type = is_matrix(var_type) || is_array(var_type);
  2852. bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
  2853. bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
  2854. // Clip/CullDistance always needs to be declared as user attributes.
  2855. if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
  2856. is_builtin = false;
  2857. // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
  2858. if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
  2859. {
  2860. add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  2861. }
  2862. else
  2863. {
  2864. add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  2865. }
  2866. }
  2867. }
  2868. }
  2869. // Fix up the mapping of variables to interface member indices, which is used to compile access chains
  2870. // for per-vertex variables in a tessellation control shader.
  2871. void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id)
  2872. {
  2873. // Only needed for tessellation shaders and pull-model interpolants.
  2874. // Need to redirect interface indices back to variables themselves.
  2875. // For structs, each member of the struct need a separate instance.
  2876. if (get_execution_model() != ExecutionModelTessellationControl &&
  2877. !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput) &&
  2878. !(get_execution_model() == ExecutionModelFragment && storage == StorageClassInput &&
  2879. !pull_model_inputs.empty()))
  2880. return;
  2881. auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size());
  2882. for (uint32_t i = 0; i < mbr_cnt; i++)
  2883. {
  2884. uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID);
  2885. if (!var_id)
  2886. continue;
  2887. auto &var = get<SPIRVariable>(var_id);
  2888. auto &type = get_variable_element_type(var);
  2889. bool flatten_composites = variable_storage_requires_stage_io(var.storage);
  2890. bool is_block = has_decoration(type.self, DecorationBlock);
  2891. uint32_t mbr_idx = uint32_t(-1);
  2892. if (type.basetype == SPIRType::Struct && (flatten_composites || is_block))
  2893. mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
  2894. if (mbr_idx != uint32_t(-1))
  2895. {
  2896. // Only set the lowest InterfaceMemberIndex for each variable member.
  2897. // IB struct members will be emitted in-order w.r.t. interface member index.
  2898. if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex))
  2899. set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
  2900. }
  2901. else
  2902. {
  2903. // Only set the lowest InterfaceMemberIndex for each variable.
  2904. // IB struct members will be emitted in-order w.r.t. interface member index.
  2905. if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex))
  2906. set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
  2907. }
  2908. }
  2909. }
  2910. // Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput.
  2911. // Returns the ID of the newly added variable, or zero if no variable was added.
  2912. uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
  2913. {
  2914. // Accumulate the variables that should appear in the interface struct.
  2915. SmallVector<SPIRVariable *> vars;
  2916. bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader();
  2917. bool has_seen_barycentric = false;
  2918. InterfaceBlockMeta meta;
  2919. // Varying interfaces between stages which use "user()" attribute can be dealt with
  2920. // without explicit packing and unpacking of components. For any variables which link against the runtime
  2921. // in some way (vertex attributes, fragment output, etc), we'll need to deal with it somehow.
  2922. bool pack_components =
  2923. (storage == StorageClassInput && get_execution_model() == ExecutionModelVertex) ||
  2924. (storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment) ||
  2925. (storage == StorageClassOutput && get_execution_model() == ExecutionModelVertex && capture_output_to_buffer);
  2926. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  2927. if (var.storage != storage)
  2928. return;
  2929. auto &type = this->get<SPIRType>(var.basetype);
  2930. bool is_builtin = is_builtin_variable(var);
  2931. bool is_block = has_decoration(type.self, DecorationBlock);
  2932. auto bi_type = BuiltInMax;
  2933. bool builtin_is_gl_in_out = false;
  2934. if (is_builtin && !is_block)
  2935. {
  2936. bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
  2937. builtin_is_gl_in_out = bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  2938. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
  2939. }
  2940. if (is_builtin && is_block)
  2941. builtin_is_gl_in_out = true;
  2942. uint32_t location = get_decoration(var_id, DecorationLocation);
  2943. bool builtin_is_stage_in_out = builtin_is_gl_in_out ||
  2944. bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
  2945. bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV ||
  2946. bi_type == BuiltInFragDepth ||
  2947. bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask;
  2948. // These builtins are part of the stage in/out structs.
  2949. bool is_interface_block_builtin =
  2950. builtin_is_stage_in_out ||
  2951. (get_execution_model() == ExecutionModelTessellationEvaluation &&
  2952. (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
  2953. bool is_active = interface_variable_exists_in_entry_point(var.self);
  2954. if (is_builtin && is_active)
  2955. {
  2956. // Only emit the builtin if it's active in this entry point. Interface variable list might lie.
  2957. if (is_block)
  2958. {
  2959. // If any builtin is active, the block is active.
  2960. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  2961. for (uint32_t i = 0; !is_active && i < mbr_cnt; i++)
  2962. is_active = has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage);
  2963. }
  2964. else
  2965. {
  2966. is_active = has_active_builtin(bi_type, storage);
  2967. }
  2968. }
  2969. bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch;
  2970. bool hidden = is_hidden_variable(var, incl_builtins);
  2971. // ClipDistance is never hidden, we need to emulate it when used as an input.
  2972. if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance)
  2973. hidden = false;
  2974. // It's not enough to simply avoid marking fragment outputs if the pipeline won't
  2975. // accept them. We can't put them in the struct at all, or otherwise the compiler
  2976. // complains that the outputs weren't explicitly marked.
  2977. if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch &&
  2978. ((is_builtin && ((bi_type == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) ||
  2979. (bi_type == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))) ||
  2980. (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location)))))
  2981. {
  2982. hidden = true;
  2983. disabled_frag_outputs.push_back(var_id);
  2984. // If a builtin, force it to have the proper name.
  2985. if (is_builtin)
  2986. set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction));
  2987. }
  2988. // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments.
  2989. if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV))
  2990. {
  2991. if (has_seen_barycentric)
  2992. SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL.");
  2993. has_seen_barycentric = true;
  2994. hidden = false;
  2995. }
  2996. if (is_active && !hidden && type.pointer && filter_patch_decoration &&
  2997. (!is_builtin || is_interface_block_builtin))
  2998. {
  2999. vars.push_back(&var);
  3000. if (!is_builtin)
  3001. {
  3002. // Need to deal specially with DecorationComponent.
  3003. // Multiple variables can alias the same Location, and try to make sure each location is declared only once.
  3004. // We will swizzle data in and out to make this work.
  3005. // This is only relevant for vertex inputs and fragment outputs.
  3006. // Technically tessellation as well, but it is too complicated to support.
  3007. uint32_t component = get_decoration(var_id, DecorationComponent);
  3008. if (component != 0)
  3009. {
  3010. if (is_tessellation_shader())
  3011. SPIRV_CROSS_THROW("Component decoration is not supported in tessellation shaders.");
  3012. else if (pack_components)
  3013. {
  3014. uint32_t array_size = 1;
  3015. if (!type.array.empty())
  3016. array_size = to_array_size_literal(type);
  3017. for (uint32_t location_offset = 0; location_offset < array_size; location_offset++)
  3018. {
  3019. auto &location_meta = meta.location_meta[location + location_offset];
  3020. location_meta.num_components = std::max(location_meta.num_components, component + type.vecsize);
  3021. // For variables sharing location, decorations and base type must match.
  3022. location_meta.base_type_id = type.self;
  3023. location_meta.flat = has_decoration(var.self, DecorationFlat);
  3024. location_meta.noperspective = has_decoration(var.self, DecorationNoPerspective);
  3025. location_meta.centroid = has_decoration(var.self, DecorationCentroid);
  3026. location_meta.sample = has_decoration(var.self, DecorationSample);
  3027. }
  3028. }
  3029. }
  3030. }
  3031. }
  3032. });
  3033. // If no variables qualify, leave.
  3034. // For patch input in a tessellation evaluation shader, the per-vertex stage inputs
  3035. // are included in a special patch control point array.
  3036. if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id))
  3037. return 0;
  3038. // Add a new typed variable for this interface structure.
  3039. // The initializer expression is allocated here, but populated when the function
  3040. // declaraion is emitted, because it is cleared after each compilation pass.
  3041. uint32_t next_id = ir.increase_bound_by(3);
  3042. uint32_t ib_type_id = next_id++;
  3043. auto &ib_type = set<SPIRType>(ib_type_id);
  3044. ib_type.basetype = SPIRType::Struct;
  3045. ib_type.storage = storage;
  3046. set_decoration(ib_type_id, DecorationBlock);
  3047. uint32_t ib_var_id = next_id++;
  3048. auto &var = set<SPIRVariable>(ib_var_id, ib_type_id, storage, 0);
  3049. var.initializer = next_id++;
  3050. string ib_var_ref;
  3051. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  3052. switch (storage)
  3053. {
  3054. case StorageClassInput:
  3055. ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name;
  3056. if (get_execution_model() == ExecutionModelTessellationControl)
  3057. {
  3058. // Add a hook to populate the shared workgroup memory containing the gl_in array.
  3059. entry_func.fixup_hooks_in.push_back([=]() {
  3060. // Can't use PatchVertices, PrimitiveId, or InvocationId yet; the hooks for those may not have run yet.
  3061. if (msl_options.multi_patch_workgroup)
  3062. {
  3063. // n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
  3064. // not the TC invocation ID.
  3065. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &",
  3066. input_buffer_var_name, "[min(", to_expression(builtin_invocation_id_id), ".x / ",
  3067. get_entry_point().output_vertices,
  3068. ", spvIndirectParams[1] - 1) * spvIndirectParams[0]];");
  3069. }
  3070. else
  3071. {
  3072. // It's safe to use InvocationId here because it's directly mapped to a
  3073. // Metal builtin, and therefore doesn't need a hook.
  3074. statement("if (", to_expression(builtin_invocation_id_id), " < spvIndirectParams[0])");
  3075. statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id),
  3076. "] = ", ib_var_ref, ";");
  3077. statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
  3078. statement("if (", to_expression(builtin_invocation_id_id),
  3079. " >= ", get_entry_point().output_vertices, ")");
  3080. statement(" return;");
  3081. }
  3082. });
  3083. }
  3084. break;
  3085. case StorageClassOutput:
  3086. {
  3087. ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name;
  3088. // Add the output interface struct as a local variable to the entry function.
  3089. // If the entry point should return the output struct, set the entry function
  3090. // to return the output interface struct, otherwise to return nothing.
  3091. // Watch out for the rare case where the terminator of the last entry point block is a
  3092. // Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that
  3093. // any block that has a Kill will also have a terminating Return, except the last block.
  3094. // Indicate the output var requires early initialization.
  3095. bool ep_should_return_output = !get_is_rasterization_disabled();
  3096. uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0;
  3097. if (!capture_output_to_buffer)
  3098. {
  3099. entry_func.add_local_variable(ib_var_id);
  3100. for (auto &blk_id : entry_func.blocks)
  3101. {
  3102. auto &blk = get<SPIRBlock>(blk_id);
  3103. if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back()))
  3104. blk.return_value = rtn_id;
  3105. }
  3106. vars_needing_early_declaration.push_back(ib_var_id);
  3107. }
  3108. else
  3109. {
  3110. switch (get_execution_model())
  3111. {
  3112. case ExecutionModelVertex:
  3113. case ExecutionModelTessellationEvaluation:
  3114. // Instead of declaring a struct variable to hold the output and then
  3115. // copying that to the output buffer, we'll declare the output variable
  3116. // as a reference to the final output element in the buffer. Then we can
  3117. // avoid the extra copy.
  3118. entry_func.fixup_hooks_in.push_back([=]() {
  3119. if (stage_out_var_id)
  3120. {
  3121. // The first member of the indirect buffer is always the number of vertices
  3122. // to draw.
  3123. // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice
  3124. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
  3125. {
  3126. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3127. " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
  3128. ".y * ", to_expression(builtin_stage_input_size_id), ".x + ",
  3129. to_expression(builtin_invocation_id_id), ".x];");
  3130. }
  3131. else if (msl_options.enable_base_index_zero)
  3132. {
  3133. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3134. " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id),
  3135. " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];");
  3136. }
  3137. else
  3138. {
  3139. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3140. " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id),
  3141. " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ",
  3142. to_expression(builtin_vertex_idx_id), " - ",
  3143. to_expression(builtin_base_vertex_id), "];");
  3144. }
  3145. }
  3146. });
  3147. break;
  3148. case ExecutionModelTessellationControl:
  3149. if (msl_options.multi_patch_workgroup)
  3150. {
  3151. // We cannot use PrimitiveId here, because the hook may not have run yet.
  3152. if (patch)
  3153. {
  3154. entry_func.fixup_hooks_in.push_back([=]() {
  3155. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3156. " = ", patch_output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
  3157. ".x / ", get_entry_point().output_vertices, "];");
  3158. });
  3159. }
  3160. else
  3161. {
  3162. entry_func.fixup_hooks_in.push_back([=]() {
  3163. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
  3164. output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), ".x - ",
  3165. to_expression(builtin_invocation_id_id), ".x % ",
  3166. get_entry_point().output_vertices, "];");
  3167. });
  3168. }
  3169. }
  3170. else
  3171. {
  3172. if (patch)
  3173. {
  3174. entry_func.fixup_hooks_in.push_back([=]() {
  3175. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3176. " = ", patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  3177. "];");
  3178. });
  3179. }
  3180. else
  3181. {
  3182. entry_func.fixup_hooks_in.push_back([=]() {
  3183. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
  3184. output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
  3185. get_entry_point().output_vertices, "];");
  3186. });
  3187. }
  3188. }
  3189. break;
  3190. default:
  3191. break;
  3192. }
  3193. }
  3194. break;
  3195. }
  3196. default:
  3197. break;
  3198. }
  3199. set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref);
  3200. set_name(ib_var_id, ib_var_ref);
  3201. for (auto *p_var : vars)
  3202. {
  3203. bool strip_array =
  3204. (get_execution_model() == ExecutionModelTessellationControl ||
  3205. (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) &&
  3206. !patch;
  3207. // Fixing up flattened stores in TESC is impossible since the memory is group shared either via
  3208. // device (not masked) or threadgroup (masked) storage classes and it's race condition city.
  3209. meta.strip_array = strip_array;
  3210. meta.allow_local_declaration = !strip_array && !(get_execution_model() == ExecutionModelTessellationControl &&
  3211. storage == StorageClassOutput);
  3212. add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta);
  3213. }
  3214. if (get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup &&
  3215. storage == StorageClassInput)
  3216. {
  3217. // For tessellation control inputs, add all outputs from the vertex shader to ensure
  3218. // the struct containing them is the correct size and layout.
  3219. for (auto &input : inputs_by_location)
  3220. {
  3221. if (location_inputs_in_use.count(input.first.location) != 0)
  3222. continue;
  3223. // Create a fake variable to put at the location.
  3224. uint32_t offset = ir.increase_bound_by(4);
  3225. uint32_t type_id = offset;
  3226. uint32_t array_type_id = offset + 1;
  3227. uint32_t ptr_type_id = offset + 2;
  3228. uint32_t var_id = offset + 3;
  3229. SPIRType type;
  3230. switch (input.second.format)
  3231. {
  3232. case MSL_SHADER_INPUT_FORMAT_UINT16:
  3233. case MSL_SHADER_INPUT_FORMAT_ANY16:
  3234. type.basetype = SPIRType::UShort;
  3235. type.width = 16;
  3236. break;
  3237. case MSL_SHADER_INPUT_FORMAT_ANY32:
  3238. default:
  3239. type.basetype = SPIRType::UInt;
  3240. type.width = 32;
  3241. break;
  3242. }
  3243. type.vecsize = input.second.vecsize;
  3244. set<SPIRType>(type_id, type);
  3245. type.array.push_back(0);
  3246. type.array_size_literal.push_back(true);
  3247. type.parent_type = type_id;
  3248. set<SPIRType>(array_type_id, type);
  3249. type.pointer = true;
  3250. type.pointer_depth++;
  3251. type.parent_type = array_type_id;
  3252. type.storage = storage;
  3253. auto &ptr_type = set<SPIRType>(ptr_type_id, type);
  3254. ptr_type.self = array_type_id;
  3255. auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
  3256. set_decoration(var_id, DecorationLocation, input.first.location);
  3257. if (input.first.component)
  3258. set_decoration(var_id, DecorationComponent, input.first.component);
  3259. meta.strip_array = true;
  3260. meta.allow_local_declaration = false;
  3261. add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
  3262. }
  3263. }
  3264. // When multiple variables need to access same location,
  3265. // unroll locations one by one and we will flatten output or input as necessary.
  3266. for (auto &loc : meta.location_meta)
  3267. {
  3268. uint32_t location = loc.first;
  3269. auto &location_meta = loc.second;
  3270. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  3271. uint32_t type_id = build_extended_vector_type(location_meta.base_type_id, location_meta.num_components);
  3272. ib_type.member_types.push_back(type_id);
  3273. set_member_name(ib_type.self, ib_mbr_idx, join("m_location_", location));
  3274. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  3275. mark_location_as_used_by_shader(location, get<SPIRType>(type_id), storage);
  3276. if (location_meta.flat)
  3277. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  3278. if (location_meta.noperspective)
  3279. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  3280. if (location_meta.centroid)
  3281. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  3282. if (location_meta.sample)
  3283. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  3284. }
  3285. // Sort the members of the structure by their locations.
  3286. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::LocationThenBuiltInType);
  3287. member_sorter.sort();
  3288. // The member indices were saved to the original variables, but after the members
  3289. // were sorted, those indices are now likely incorrect. Fix those up now.
  3290. fix_up_interface_member_indices(storage, ib_type_id);
  3291. // For patch inputs, add one more member, holding the array of control point data.
  3292. if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch &&
  3293. stage_in_var_id)
  3294. {
  3295. uint32_t pcp_type_id = ir.increase_bound_by(1);
  3296. auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
  3297. pcp_type.basetype = SPIRType::ControlPointArray;
  3298. pcp_type.parent_type = pcp_type.type_alias = get_stage_in_struct_type().self;
  3299. pcp_type.storage = storage;
  3300. ir.meta[pcp_type_id] = ir.meta[ib_type.self];
  3301. uint32_t mbr_idx = uint32_t(ib_type.member_types.size());
  3302. ib_type.member_types.push_back(pcp_type_id);
  3303. set_member_name(ib_type.self, mbr_idx, "gl_in");
  3304. }
  3305. return ib_var_id;
  3306. }
  3307. uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage)
  3308. {
  3309. if (!ib_var_id)
  3310. return 0;
  3311. uint32_t ib_ptr_var_id;
  3312. uint32_t next_id = ir.increase_bound_by(3);
  3313. auto &ib_type = expression_type(ib_var_id);
  3314. if (get_execution_model() == ExecutionModelTessellationControl)
  3315. {
  3316. // Tessellation control per-vertex I/O is presented as an array, so we must
  3317. // do the same with our struct here.
  3318. uint32_t ib_ptr_type_id = next_id++;
  3319. auto &ib_ptr_type = set<SPIRType>(ib_ptr_type_id, ib_type);
  3320. ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self;
  3321. ib_ptr_type.pointer = true;
  3322. ib_ptr_type.pointer_depth++;
  3323. ib_ptr_type.storage =
  3324. storage == StorageClassInput ?
  3325. (msl_options.multi_patch_workgroup ? StorageClassStorageBuffer : StorageClassWorkgroup) :
  3326. StorageClassStorageBuffer;
  3327. ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self];
  3328. // To ensure that get_variable_data_type() doesn't strip off the pointer,
  3329. // which we need, use another pointer.
  3330. uint32_t ib_ptr_ptr_type_id = next_id++;
  3331. auto &ib_ptr_ptr_type = set<SPIRType>(ib_ptr_ptr_type_id, ib_ptr_type);
  3332. ib_ptr_ptr_type.parent_type = ib_ptr_type_id;
  3333. ib_ptr_ptr_type.type_alias = ib_type.self;
  3334. ib_ptr_ptr_type.storage = StorageClassFunction;
  3335. ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self];
  3336. ib_ptr_var_id = next_id;
  3337. set<SPIRVariable>(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0);
  3338. set_name(ib_ptr_var_id, storage == StorageClassInput ? "gl_in" : "gl_out");
  3339. }
  3340. else
  3341. {
  3342. // Tessellation evaluation per-vertex inputs are also presented as arrays.
  3343. // But, in Metal, this array uses a very special type, 'patch_control_point<T>',
  3344. // which is a container that can be used to access the control point data.
  3345. // To represent this, a special 'ControlPointArray' type has been added to the
  3346. // SPIRV-Cross type system. It should only be generated by and seen in the MSL
  3347. // backend (i.e. this one).
  3348. uint32_t pcp_type_id = next_id++;
  3349. auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
  3350. pcp_type.basetype = SPIRType::ControlPointArray;
  3351. pcp_type.parent_type = pcp_type.type_alias = ib_type.self;
  3352. pcp_type.storage = storage;
  3353. ir.meta[pcp_type_id] = ir.meta[ib_type.self];
  3354. ib_ptr_var_id = next_id;
  3355. set<SPIRVariable>(ib_ptr_var_id, pcp_type_id, storage, 0);
  3356. set_name(ib_ptr_var_id, "gl_in");
  3357. ir.meta[ib_ptr_var_id].decoration.qualified_alias = join(patch_stage_in_var_name, ".gl_in");
  3358. }
  3359. return ib_ptr_var_id;
  3360. }
  3361. // Ensure that the type is compatible with the builtin.
  3362. // If it is, simply return the given type ID.
  3363. // Otherwise, create a new type, and return it's ID.
  3364. uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin)
  3365. {
  3366. auto &type = get<SPIRType>(type_id);
  3367. if ((builtin == BuiltInSampleMask && is_array(type)) ||
  3368. ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) &&
  3369. type.basetype != SPIRType::UInt))
  3370. {
  3371. uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
  3372. uint32_t base_type_id = next_id++;
  3373. auto &base_type = set<SPIRType>(base_type_id);
  3374. base_type.basetype = SPIRType::UInt;
  3375. base_type.width = 32;
  3376. if (!type.pointer)
  3377. return base_type_id;
  3378. uint32_t ptr_type_id = next_id++;
  3379. auto &ptr_type = set<SPIRType>(ptr_type_id);
  3380. ptr_type = base_type;
  3381. ptr_type.pointer = true;
  3382. ptr_type.pointer_depth++;
  3383. ptr_type.storage = type.storage;
  3384. ptr_type.parent_type = base_type_id;
  3385. return ptr_type_id;
  3386. }
  3387. return type_id;
  3388. }
  3389. // Ensure that the type is compatible with the shader input.
  3390. // If it is, simply return the given type ID.
  3391. // Otherwise, create a new type, and return its ID.
  3392. uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, uint32_t num_components, bool strip_array)
  3393. {
  3394. auto &type = get<SPIRType>(type_id);
  3395. uint32_t max_array_dimensions = strip_array ? 1 : 0;
  3396. // Struct and array types must match exactly.
  3397. if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions)
  3398. return type_id;
  3399. auto p_va = inputs_by_location.find({location, component});
  3400. if (p_va == end(inputs_by_location))
  3401. {
  3402. if (num_components > type.vecsize)
  3403. return build_extended_vector_type(type_id, num_components);
  3404. else
  3405. return type_id;
  3406. }
  3407. if (num_components == 0)
  3408. num_components = p_va->second.vecsize;
  3409. switch (p_va->second.format)
  3410. {
  3411. case MSL_SHADER_INPUT_FORMAT_UINT8:
  3412. {
  3413. switch (type.basetype)
  3414. {
  3415. case SPIRType::UByte:
  3416. case SPIRType::UShort:
  3417. case SPIRType::UInt:
  3418. if (num_components > type.vecsize)
  3419. return build_extended_vector_type(type_id, num_components);
  3420. else
  3421. return type_id;
  3422. case SPIRType::Short:
  3423. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  3424. SPIRType::UShort);
  3425. case SPIRType::Int:
  3426. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  3427. SPIRType::UInt);
  3428. default:
  3429. SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
  3430. }
  3431. }
  3432. case MSL_SHADER_INPUT_FORMAT_UINT16:
  3433. {
  3434. switch (type.basetype)
  3435. {
  3436. case SPIRType::UShort:
  3437. case SPIRType::UInt:
  3438. if (num_components > type.vecsize)
  3439. return build_extended_vector_type(type_id, num_components);
  3440. else
  3441. return type_id;
  3442. case SPIRType::Int:
  3443. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  3444. SPIRType::UInt);
  3445. default:
  3446. SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
  3447. }
  3448. }
  3449. default:
  3450. if (num_components > type.vecsize)
  3451. type_id = build_extended_vector_type(type_id, num_components);
  3452. break;
  3453. }
  3454. return type_id;
  3455. }
  3456. void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
  3457. {
  3458. set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
  3459. // Problem case! Struct needs to be placed at an awkward alignment.
  3460. // Mark every member of the child struct as packed.
  3461. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  3462. for (uint32_t i = 0; i < mbr_cnt; i++)
  3463. {
  3464. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  3465. if (mbr_type.basetype == SPIRType::Struct)
  3466. {
  3467. // Recursively mark structs as packed.
  3468. auto *struct_type = &mbr_type;
  3469. while (!struct_type->array.empty())
  3470. struct_type = &get<SPIRType>(struct_type->parent_type);
  3471. mark_struct_members_packed(*struct_type);
  3472. }
  3473. else if (!is_scalar(mbr_type))
  3474. set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked);
  3475. }
  3476. }
  3477. void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
  3478. {
  3479. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  3480. for (uint32_t i = 0; i < mbr_cnt; i++)
  3481. {
  3482. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  3483. if (mbr_type.basetype == SPIRType::Struct)
  3484. {
  3485. auto *struct_type = &mbr_type;
  3486. while (!struct_type->array.empty())
  3487. struct_type = &get<SPIRType>(struct_type->parent_type);
  3488. if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked))
  3489. continue;
  3490. uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i);
  3491. uint32_t msl_size = get_declared_struct_member_size_msl(type, i);
  3492. uint32_t spirv_offset = type_struct_member_offset(type, i);
  3493. uint32_t spirv_offset_next;
  3494. if (i + 1 < mbr_cnt)
  3495. spirv_offset_next = type_struct_member_offset(type, i + 1);
  3496. else
  3497. spirv_offset_next = spirv_offset + msl_size;
  3498. // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes,
  3499. // and the next member will be placed at offset 12.
  3500. bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0;
  3501. bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next;
  3502. uint32_t array_stride = 0;
  3503. bool struct_needs_explicit_padding = false;
  3504. // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct.
  3505. if (!mbr_type.array.empty())
  3506. {
  3507. array_stride = type_struct_member_array_stride(type, i);
  3508. uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
  3509. for (uint32_t dim = 0; dim < dimensions; dim++)
  3510. {
  3511. uint32_t array_size = to_array_size_literal(mbr_type, dim);
  3512. array_stride /= max(array_size, 1u);
  3513. }
  3514. // Set expected struct size based on ArrayStride.
  3515. struct_needs_explicit_padding = true;
  3516. // If struct size is larger than array stride, we might be able to fit, if we tightly pack.
  3517. if (get_declared_struct_size_msl(*struct_type) > array_stride)
  3518. struct_is_too_large = true;
  3519. }
  3520. if (struct_is_misaligned || struct_is_too_large)
  3521. mark_struct_members_packed(*struct_type);
  3522. mark_scalar_layout_structs(*struct_type);
  3523. if (struct_needs_explicit_padding)
  3524. {
  3525. msl_size = get_declared_struct_size_msl(*struct_type, true, true);
  3526. if (array_stride < msl_size)
  3527. {
  3528. SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type.");
  3529. }
  3530. else
  3531. {
  3532. if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
  3533. {
  3534. if (array_stride !=
  3535. get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
  3536. SPIRV_CROSS_THROW(
  3537. "A struct is used with different array strides. Cannot express this in MSL.");
  3538. }
  3539. else
  3540. set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride);
  3541. }
  3542. }
  3543. }
  3544. }
  3545. }
  3546. // Sort the members of the struct type by offset, and pack and then pad members where needed
  3547. // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
  3548. // occurs first, followed by padding, because packing a member reduces both its size and its
  3549. // natural alignment, possibly requiring a padding member to be added ahead of it.
  3550. void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs)
  3551. {
  3552. // We align structs recursively, so stop any redundant work.
  3553. ID &ib_type_id = ib_type.self;
  3554. if (aligned_structs.count(ib_type_id))
  3555. return;
  3556. aligned_structs.insert(ib_type_id);
  3557. // Sort the members of the interface structure by their offset.
  3558. // They should already be sorted per SPIR-V spec anyway.
  3559. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
  3560. member_sorter.sort();
  3561. auto mbr_cnt = uint32_t(ib_type.member_types.size());
  3562. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  3563. {
  3564. // Pack any dependent struct types before we pack a parent struct.
  3565. auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]);
  3566. if (mbr_type.basetype == SPIRType::Struct)
  3567. align_struct(mbr_type, aligned_structs);
  3568. }
  3569. // Test the alignment of each member, and if a member should be closer to the previous
  3570. // member than the default spacing expects, it is likely that the previous member is in
  3571. // a packed format. If so, and the previous member is packable, pack it.
  3572. // For example ... this applies to any 3-element vector that is followed by a scalar.
  3573. uint32_t msl_offset = 0;
  3574. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  3575. {
  3576. // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V
  3577. // offsets, array strides and matrix strides.
  3578. ensure_member_packing_rules_msl(ib_type, mbr_idx);
  3579. // Align current offset to the current member's default alignment. If the member was packed, it will observe
  3580. // the updated alignment here.
  3581. uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1;
  3582. uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
  3583. // Fetch the member offset as declared in the SPIRV.
  3584. uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
  3585. if (spirv_mbr_offset > aligned_msl_offset)
  3586. {
  3587. // Since MSL and SPIR-V have slightly different struct member alignment and
  3588. // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther
  3589. // away than C-packing, expects, add an inert padding member before the the member.
  3590. uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset;
  3591. set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes);
  3592. // Re-align as a sanity check that aligning post-padding matches up.
  3593. msl_offset += padding_bytes;
  3594. aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
  3595. }
  3596. else if (spirv_mbr_offset < aligned_msl_offset)
  3597. {
  3598. // This should not happen, but deal with unexpected scenarios.
  3599. // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V.
  3600. SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL.");
  3601. }
  3602. assert(aligned_msl_offset == spirv_mbr_offset);
  3603. // Increment the current offset to be positioned immediately after the current member.
  3604. // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
  3605. if (mbr_idx + 1 < mbr_cnt)
  3606. msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx);
  3607. }
  3608. }
  3609. bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const
  3610. {
  3611. auto &mbr_type = get<SPIRType>(type.member_types[index]);
  3612. uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset);
  3613. if (index + 1 < type.member_types.size())
  3614. {
  3615. // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member,
  3616. // we *must* perform some kind of remapping, no way getting around it.
  3617. // We can always pad after this member if necessary, so that case is fine.
  3618. uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset);
  3619. assert(spirv_offset_next >= spirv_offset);
  3620. uint32_t maximum_size = spirv_offset_next - spirv_offset;
  3621. uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index);
  3622. if (msl_mbr_size > maximum_size)
  3623. return false;
  3624. }
  3625. if (!mbr_type.array.empty())
  3626. {
  3627. // If we have an array type, array stride must match exactly with SPIR-V.
  3628. // An exception to this requirement is if we have one array element.
  3629. // This comes from DX scalar layout workaround.
  3630. // If app tries to be cheeky and access the member out of bounds, this will not work, but this is the best we can do.
  3631. // In OpAccessChain with logical memory models, access chains must be in-bounds in SPIR-V specification.
  3632. bool relax_array_stride = mbr_type.array.back() == 1 && mbr_type.array_size_literal.back();
  3633. if (!relax_array_stride)
  3634. {
  3635. uint32_t spirv_array_stride = type_struct_member_array_stride(type, index);
  3636. uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index);
  3637. if (spirv_array_stride != msl_array_stride)
  3638. return false;
  3639. }
  3640. }
  3641. if (is_matrix(mbr_type))
  3642. {
  3643. // Need to check MatrixStride as well.
  3644. uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index);
  3645. uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index);
  3646. if (spirv_matrix_stride != msl_matrix_stride)
  3647. return false;
  3648. }
  3649. // Now, we check alignment.
  3650. uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index);
  3651. if ((spirv_offset % msl_alignment) != 0)
  3652. return false;
  3653. // We're in the clear.
  3654. return true;
  3655. }
  3656. // Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions.
  3657. // If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types.
  3658. // In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides.
  3659. void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index)
  3660. {
  3661. if (validate_member_packing_rules_msl(ib_type, index))
  3662. return;
  3663. // We failed validation.
  3664. // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
  3665. // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
  3666. // that struct alignment == max alignment of all members and struct size depends on this alignment.
  3667. auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
  3668. if (mbr_type.basetype == SPIRType::Struct)
  3669. SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
  3670. // Perform remapping here.
  3671. // There is nothing to be gained by using packed scalars, so don't attempt it.
  3672. if (!is_scalar(ib_type))
  3673. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  3674. // Try validating again, now with packed.
  3675. if (validate_member_packing_rules_msl(ib_type, index))
  3676. return;
  3677. // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect.
  3678. // A lot of work goes here ...
  3679. // We will need remapping on Load and Store to translate the types between Logical and Physical.
  3680. // First, we check if we have small vector std140 array.
  3681. // We detect this if we have an array of vectors, and array stride is greater than number of elements.
  3682. if (!mbr_type.array.empty() && !is_matrix(mbr_type))
  3683. {
  3684. uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
  3685. // Hack off array-of-arrays until we find the array stride per element we must have to make it work.
  3686. uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
  3687. for (uint32_t dim = 0; dim < dimensions; dim++)
  3688. array_stride /= max(to_array_size_literal(mbr_type, dim), 1u);
  3689. uint32_t elems_per_stride = array_stride / (mbr_type.width / 8);
  3690. if (elems_per_stride == 3)
  3691. SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
  3692. else if (elems_per_stride > 4)
  3693. SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
  3694. auto physical_type = mbr_type;
  3695. physical_type.vecsize = elems_per_stride;
  3696. physical_type.parent_type = 0;
  3697. uint32_t type_id = ir.increase_bound_by(1);
  3698. set<SPIRType>(type_id, physical_type);
  3699. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
  3700. set_decoration(type_id, DecorationArrayStride, array_stride);
  3701. // Remove packed_ for vectors of size 1, 2 and 4.
  3702. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  3703. }
  3704. else if (is_matrix(mbr_type))
  3705. {
  3706. // MatrixStride might be std140-esque.
  3707. uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index);
  3708. uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8);
  3709. if (elems_per_stride == 3)
  3710. SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
  3711. else if (elems_per_stride > 4)
  3712. SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
  3713. bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
  3714. auto physical_type = mbr_type;
  3715. physical_type.parent_type = 0;
  3716. if (row_major)
  3717. physical_type.columns = elems_per_stride;
  3718. else
  3719. physical_type.vecsize = elems_per_stride;
  3720. uint32_t type_id = ir.increase_bound_by(1);
  3721. set<SPIRType>(type_id, physical_type);
  3722. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
  3723. // Remove packed_ for vectors of size 1, 2 and 4.
  3724. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  3725. }
  3726. else
  3727. SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
  3728. // Try validating again, now with physical type remapping.
  3729. if (validate_member_packing_rules_msl(ib_type, index))
  3730. return;
  3731. // We might have a particular odd scalar layout case where the last element of an array
  3732. // does not take up as much space as the ArrayStride or MatrixStride. This can happen with DX cbuffers.
  3733. // The "proper" workaround for this is extremely painful and essentially impossible in the edge case of float3[],
  3734. // so we hack around it by declaring the offending array or matrix with one less array size/col/row,
  3735. // and rely on padding to get the correct value. We will technically access arrays out of bounds into the padding region,
  3736. // but it should spill over gracefully without too much trouble. We rely on behavior like this for unsized arrays anyways.
  3737. // E.g. we might observe a physical layout of:
  3738. // { float2 a[2]; float b; } in cbuffer layout where ArrayStride of a is 16, but offset of b is 24, packed right after a[1] ...
  3739. uint32_t type_id = get_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  3740. auto &type = get<SPIRType>(type_id);
  3741. // Modify the physical type in-place. This is safe since each physical type workaround is a copy.
  3742. if (is_array(type))
  3743. {
  3744. if (type.array.back() > 1)
  3745. {
  3746. if (!type.array_size_literal.back())
  3747. SPIRV_CROSS_THROW("Cannot apply scalar layout workaround with spec constant array size.");
  3748. type.array.back() -= 1;
  3749. }
  3750. else
  3751. {
  3752. // We have an array of size 1, so we cannot decrement that. Our only option now is to
  3753. // force a packed layout instead, and drop the physical type remap since ArrayStride is meaningless now.
  3754. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  3755. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  3756. }
  3757. }
  3758. else if (is_matrix(type))
  3759. {
  3760. bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
  3761. if (!row_major)
  3762. {
  3763. // Slice off one column. If we only have 2 columns, this might turn the matrix into a vector with one array element instead.
  3764. if (type.columns > 2)
  3765. {
  3766. type.columns--;
  3767. }
  3768. else if (type.columns == 2)
  3769. {
  3770. type.columns = 1;
  3771. assert(type.array.empty());
  3772. type.array.push_back(1);
  3773. type.array_size_literal.push_back(true);
  3774. }
  3775. }
  3776. else
  3777. {
  3778. // Slice off one row. If we only have 2 rows, this might turn the matrix into a vector with one array element instead.
  3779. if (type.vecsize > 2)
  3780. {
  3781. type.vecsize--;
  3782. }
  3783. else if (type.vecsize == 2)
  3784. {
  3785. type.vecsize = type.columns;
  3786. type.columns = 1;
  3787. assert(type.array.empty());
  3788. type.array.push_back(1);
  3789. type.array_size_literal.push_back(true);
  3790. }
  3791. }
  3792. }
  3793. // This better validate now, or we must fail gracefully.
  3794. if (!validate_member_packing_rules_msl(ib_type, index))
  3795. SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
  3796. }
  3797. void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  3798. {
  3799. auto &type = expression_type(rhs_expression);
  3800. bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID);
  3801. bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked);
  3802. auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
  3803. auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
  3804. bool transpose = lhs_e && lhs_e->need_transpose;
  3805. // No physical type remapping, and no packed type, so can just emit a store directly.
  3806. if (!lhs_remapped_type && !lhs_packed_type)
  3807. {
  3808. // We might not be dealing with remapped physical types or packed types,
  3809. // but we might be doing a clean store to a row-major matrix.
  3810. // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any.
  3811. if (is_matrix(type) && lhs_e && lhs_e->need_transpose)
  3812. {
  3813. lhs_e->need_transpose = false;
  3814. if (rhs_e && rhs_e->need_transpose)
  3815. {
  3816. // Direct copy, but might need to unpack RHS.
  3817. // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T.
  3818. rhs_e->need_transpose = false;
  3819. statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression),
  3820. ";");
  3821. rhs_e->need_transpose = true;
  3822. }
  3823. else
  3824. statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");");
  3825. lhs_e->need_transpose = true;
  3826. register_write(lhs_expression);
  3827. }
  3828. else if (lhs_e && lhs_e->need_transpose)
  3829. {
  3830. lhs_e->need_transpose = false;
  3831. // Storing a column to a row-major matrix. Unroll the write.
  3832. for (uint32_t c = 0; c < type.vecsize; c++)
  3833. {
  3834. auto lhs_expr = to_dereferenced_expression(lhs_expression);
  3835. auto column_index = lhs_expr.find_last_of('[');
  3836. if (column_index != string::npos)
  3837. {
  3838. statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
  3839. to_extract_component_expression(rhs_expression, c), ";");
  3840. }
  3841. }
  3842. lhs_e->need_transpose = true;
  3843. register_write(lhs_expression);
  3844. }
  3845. else
  3846. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
  3847. }
  3848. else if (!lhs_remapped_type && !is_matrix(type) && !transpose)
  3849. {
  3850. // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly,
  3851. // since they are declared as array of vectors instead, and we need the fallback path below.
  3852. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
  3853. }
  3854. else
  3855. {
  3856. // Special handling when storing to a remapped physical type.
  3857. // This is mostly to deal with std140 padded matrices or vectors.
  3858. TypeID physical_type_id = lhs_remapped_type ?
  3859. ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) :
  3860. type.self;
  3861. auto &physical_type = get<SPIRType>(physical_type_id);
  3862. if (is_matrix(type))
  3863. {
  3864. const char *packed_pfx = lhs_packed_type ? "packed_" : "";
  3865. // Packed matrices are stored as arrays of packed vectors, so we need
  3866. // to assign the vectors one at a time.
  3867. // For row-major matrices, we need to transpose the *right-hand* side,
  3868. // not the left-hand side.
  3869. // Lots of cases to cover here ...
  3870. bool rhs_transpose = rhs_e && rhs_e->need_transpose;
  3871. SPIRType write_type = type;
  3872. string cast_expr;
  3873. // We're dealing with transpose manually.
  3874. if (rhs_transpose)
  3875. rhs_e->need_transpose = false;
  3876. if (transpose)
  3877. {
  3878. // We're dealing with transpose manually.
  3879. lhs_e->need_transpose = false;
  3880. write_type.vecsize = type.columns;
  3881. write_type.columns = 1;
  3882. if (physical_type.columns != type.columns)
  3883. cast_expr = join("(device ", packed_pfx, type_to_glsl(write_type), "&)");
  3884. if (rhs_transpose)
  3885. {
  3886. // If RHS is also transposed, we can just copy row by row.
  3887. for (uint32_t i = 0; i < type.vecsize; i++)
  3888. {
  3889. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
  3890. to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];");
  3891. }
  3892. }
  3893. else
  3894. {
  3895. auto vector_type = expression_type(rhs_expression);
  3896. vector_type.vecsize = vector_type.columns;
  3897. vector_type.columns = 1;
  3898. // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
  3899. // so pick out individual components instead.
  3900. for (uint32_t i = 0; i < type.vecsize; i++)
  3901. {
  3902. string rhs_row = type_to_glsl_constructor(vector_type) + "(";
  3903. for (uint32_t j = 0; j < vector_type.vecsize; j++)
  3904. {
  3905. rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]");
  3906. if (j + 1 < vector_type.vecsize)
  3907. rhs_row += ", ";
  3908. }
  3909. rhs_row += ")";
  3910. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
  3911. }
  3912. }
  3913. // We're dealing with transpose manually.
  3914. lhs_e->need_transpose = true;
  3915. }
  3916. else
  3917. {
  3918. write_type.columns = 1;
  3919. if (physical_type.vecsize != type.vecsize)
  3920. cast_expr = join("(device ", packed_pfx, type_to_glsl(write_type), "&)");
  3921. if (rhs_transpose)
  3922. {
  3923. auto vector_type = expression_type(rhs_expression);
  3924. vector_type.columns = 1;
  3925. // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
  3926. // so pick out individual components instead.
  3927. for (uint32_t i = 0; i < type.columns; i++)
  3928. {
  3929. string rhs_row = type_to_glsl_constructor(vector_type) + "(";
  3930. for (uint32_t j = 0; j < vector_type.vecsize; j++)
  3931. {
  3932. // Need to explicitly unpack expression since we've mucked with transpose state.
  3933. auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression);
  3934. rhs_row += join(unpacked_expr, "[", j, "][", i, "]");
  3935. if (j + 1 < vector_type.vecsize)
  3936. rhs_row += ", ";
  3937. }
  3938. rhs_row += ")";
  3939. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
  3940. }
  3941. }
  3942. else
  3943. {
  3944. // Copy column-by-column.
  3945. for (uint32_t i = 0; i < type.columns; i++)
  3946. {
  3947. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
  3948. to_enclosed_unpacked_expression(rhs_expression), "[", i, "];");
  3949. }
  3950. }
  3951. }
  3952. // We're dealing with transpose manually.
  3953. if (rhs_transpose)
  3954. rhs_e->need_transpose = true;
  3955. }
  3956. else if (transpose)
  3957. {
  3958. lhs_e->need_transpose = false;
  3959. SPIRType write_type = type;
  3960. write_type.vecsize = 1;
  3961. write_type.columns = 1;
  3962. // Storing a column to a row-major matrix. Unroll the write.
  3963. for (uint32_t c = 0; c < type.vecsize; c++)
  3964. {
  3965. auto lhs_expr = to_enclosed_expression(lhs_expression);
  3966. auto column_index = lhs_expr.find_last_of('[');
  3967. if (column_index != string::npos)
  3968. {
  3969. statement("((device ", type_to_glsl(write_type), "*)&",
  3970. lhs_expr.insert(column_index, join('[', c, ']', ")")), " = ",
  3971. to_extract_component_expression(rhs_expression, c), ";");
  3972. }
  3973. }
  3974. lhs_e->need_transpose = true;
  3975. }
  3976. else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize)
  3977. {
  3978. assert(type.vecsize >= 1 && type.vecsize <= 3);
  3979. // If we have packed types, we cannot use swizzled stores.
  3980. // We could technically unroll the store for each element if needed.
  3981. // When remapping to a std140 physical type, we always get float4,
  3982. // and the packed decoration should always be removed.
  3983. assert(!lhs_packed_type);
  3984. string lhs = to_dereferenced_expression(lhs_expression);
  3985. string rhs = to_pointer_expression(rhs_expression);
  3986. // Unpack the expression so we can store to it with a float or float2.
  3987. // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
  3988. lhs = join("(device ", type_to_glsl(type), "&)", enclose_expression(lhs));
  3989. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  3990. statement(lhs, " = ", rhs, ";");
  3991. }
  3992. else if (!is_matrix(type))
  3993. {
  3994. string lhs = to_dereferenced_expression(lhs_expression);
  3995. string rhs = to_pointer_expression(rhs_expression);
  3996. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  3997. statement(lhs, " = ", rhs, ";");
  3998. }
  3999. register_write(lhs_expression);
  4000. }
  4001. }
  4002. static bool expression_ends_with(const string &expr_str, const std::string &ending)
  4003. {
  4004. if (expr_str.length() >= ending.length())
  4005. return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0);
  4006. else
  4007. return false;
  4008. }
  4009. // Converts the format of the current expression from packed to unpacked,
  4010. // by wrapping the expression in a constructor of the appropriate type.
  4011. // Also, handle special physical ID remapping scenarios, similar to emit_store_statement().
  4012. string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id,
  4013. bool packed, bool row_major)
  4014. {
  4015. // Trivial case, nothing to do.
  4016. if (physical_type_id == 0 && !packed)
  4017. return expr_str;
  4018. const SPIRType *physical_type = nullptr;
  4019. if (physical_type_id)
  4020. physical_type = &get<SPIRType>(physical_type_id);
  4021. static const char *swizzle_lut[] = {
  4022. ".x",
  4023. ".xy",
  4024. ".xyz",
  4025. };
  4026. if (physical_type && is_vector(*physical_type) && is_array(*physical_type) &&
  4027. physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1]))
  4028. {
  4029. // std140 array cases for vectors.
  4030. assert(type.vecsize >= 1 && type.vecsize <= 3);
  4031. return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
  4032. }
  4033. else if (physical_type && is_matrix(*physical_type) && is_vector(type) && physical_type->vecsize > type.vecsize)
  4034. {
  4035. // Extract column from padded matrix.
  4036. assert(type.vecsize >= 1 && type.vecsize <= 3);
  4037. return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
  4038. }
  4039. else if (is_matrix(type))
  4040. {
  4041. // Packed matrices are stored as arrays of packed vectors. Unfortunately,
  4042. // we can't just pass the array straight to the matrix constructor. We have to
  4043. // pass each vector individually, so that they can be unpacked to normal vectors.
  4044. if (!physical_type)
  4045. physical_type = &type;
  4046. uint32_t vecsize = type.vecsize;
  4047. uint32_t columns = type.columns;
  4048. if (row_major)
  4049. swap(vecsize, columns);
  4050. uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize;
  4051. const char *base_type = type.width == 16 ? "half" : "float";
  4052. string unpack_expr = join(base_type, columns, "x", vecsize, "(");
  4053. const char *load_swiz = "";
  4054. if (physical_vecsize != vecsize)
  4055. load_swiz = swizzle_lut[vecsize - 1];
  4056. for (uint32_t i = 0; i < columns; i++)
  4057. {
  4058. if (i > 0)
  4059. unpack_expr += ", ";
  4060. if (packed)
  4061. unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz);
  4062. else
  4063. unpack_expr += join(expr_str, "[", i, "]", load_swiz);
  4064. }
  4065. unpack_expr += ")";
  4066. return unpack_expr;
  4067. }
  4068. else
  4069. {
  4070. return join(type_to_glsl(type), "(", expr_str, ")");
  4071. }
  4072. }
  4073. // Emits the file header info
  4074. void CompilerMSL::emit_header()
  4075. {
  4076. // This particular line can be overridden during compilation, so make it a flag and not a pragma line.
  4077. if (suppress_missing_prototypes)
  4078. statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
  4079. // Disable warning about missing braces for array<T> template to make arrays a value type
  4080. if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
  4081. statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
  4082. for (auto &pragma : pragma_lines)
  4083. statement(pragma);
  4084. if (!pragma_lines.empty() || suppress_missing_prototypes)
  4085. statement("");
  4086. statement("#include <metal_stdlib>");
  4087. statement("#include <simd/simd.h>");
  4088. for (auto &header : header_lines)
  4089. statement(header);
  4090. statement("");
  4091. statement("using namespace metal;");
  4092. statement("");
  4093. for (auto &td : typedef_lines)
  4094. statement(td);
  4095. if (!typedef_lines.empty())
  4096. statement("");
  4097. }
  4098. void CompilerMSL::add_pragma_line(const string &line)
  4099. {
  4100. auto rslt = pragma_lines.insert(line);
  4101. if (rslt.second)
  4102. force_recompile();
  4103. }
  4104. void CompilerMSL::add_typedef_line(const string &line)
  4105. {
  4106. auto rslt = typedef_lines.insert(line);
  4107. if (rslt.second)
  4108. force_recompile();
  4109. }
  4110. // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
  4111. void CompilerMSL::emit_custom_templates()
  4112. {
  4113. for (const auto &spv_func : spv_function_implementations)
  4114. {
  4115. switch (spv_func)
  4116. {
  4117. case SPVFuncImplUnsafeArray:
  4118. statement("template<typename T, size_t Num>");
  4119. statement("struct spvUnsafeArray");
  4120. begin_scope();
  4121. statement("T elements[Num ? Num : 1];");
  4122. statement("");
  4123. statement("thread T& operator [] (size_t pos) thread");
  4124. begin_scope();
  4125. statement("return elements[pos];");
  4126. end_scope();
  4127. statement("constexpr const thread T& operator [] (size_t pos) const thread");
  4128. begin_scope();
  4129. statement("return elements[pos];");
  4130. end_scope();
  4131. statement("");
  4132. statement("device T& operator [] (size_t pos) device");
  4133. begin_scope();
  4134. statement("return elements[pos];");
  4135. end_scope();
  4136. statement("constexpr const device T& operator [] (size_t pos) const device");
  4137. begin_scope();
  4138. statement("return elements[pos];");
  4139. end_scope();
  4140. statement("");
  4141. statement("constexpr const constant T& operator [] (size_t pos) const constant");
  4142. begin_scope();
  4143. statement("return elements[pos];");
  4144. end_scope();
  4145. statement("");
  4146. statement("threadgroup T& operator [] (size_t pos) threadgroup");
  4147. begin_scope();
  4148. statement("return elements[pos];");
  4149. end_scope();
  4150. statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup");
  4151. begin_scope();
  4152. statement("return elements[pos];");
  4153. end_scope();
  4154. end_scope_decl();
  4155. statement("");
  4156. break;
  4157. default:
  4158. break;
  4159. }
  4160. }
  4161. }
  4162. // Emits any needed custom function bodies.
  4163. // Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline))
  4164. // otherwise they will cause problems when linked together in a single Metallib.
  4165. void CompilerMSL::emit_custom_functions()
  4166. {
  4167. for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--)
  4168. if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
  4169. spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
  4170. if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
  4171. {
  4172. // Unfortunately, this one needs a lot of the other functions to compile OK.
  4173. if (!msl_options.supports_msl_version(2))
  4174. SPIRV_CROSS_THROW(
  4175. "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
  4176. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  4177. spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
  4178. if (msl_options.swizzle_texture_samples)
  4179. spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
  4180. for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
  4181. i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
  4182. spv_function_implementations.insert(static_cast<SPVFuncImpl>(i));
  4183. spv_function_implementations.insert(SPVFuncImplExpandITUFullRange);
  4184. spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange);
  4185. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709);
  4186. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601);
  4187. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
  4188. }
  4189. for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
  4190. i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
  4191. if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i)))
  4192. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  4193. if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
  4194. spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
  4195. spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
  4196. {
  4197. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  4198. spv_function_implementations.insert(SPVFuncImplGetSwizzle);
  4199. }
  4200. for (const auto &spv_func : spv_function_implementations)
  4201. {
  4202. switch (spv_func)
  4203. {
  4204. case SPVFuncImplMod:
  4205. statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
  4206. statement("template<typename Tx, typename Ty>");
  4207. statement("inline Tx mod(Tx x, Ty y)");
  4208. begin_scope();
  4209. statement("return x - y * floor(x / y);");
  4210. end_scope();
  4211. statement("");
  4212. break;
  4213. case SPVFuncImplRadians:
  4214. statement("// Implementation of the GLSL radians() function");
  4215. statement("template<typename T>");
  4216. statement("inline T radians(T d)");
  4217. begin_scope();
  4218. statement("return d * T(0.01745329251);");
  4219. end_scope();
  4220. statement("");
  4221. break;
  4222. case SPVFuncImplDegrees:
  4223. statement("// Implementation of the GLSL degrees() function");
  4224. statement("template<typename T>");
  4225. statement("inline T degrees(T r)");
  4226. begin_scope();
  4227. statement("return r * T(57.2957795131);");
  4228. end_scope();
  4229. statement("");
  4230. break;
  4231. case SPVFuncImplFindILsb:
  4232. statement("// Implementation of the GLSL findLSB() function");
  4233. statement("template<typename T>");
  4234. statement("inline T spvFindLSB(T x)");
  4235. begin_scope();
  4236. statement("return select(ctz(x), T(-1), x == T(0));");
  4237. end_scope();
  4238. statement("");
  4239. break;
  4240. case SPVFuncImplFindUMsb:
  4241. statement("// Implementation of the unsigned GLSL findMSB() function");
  4242. statement("template<typename T>");
  4243. statement("inline T spvFindUMSB(T x)");
  4244. begin_scope();
  4245. statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
  4246. end_scope();
  4247. statement("");
  4248. break;
  4249. case SPVFuncImplFindSMsb:
  4250. statement("// Implementation of the signed GLSL findMSB() function");
  4251. statement("template<typename T>");
  4252. statement("inline T spvFindSMSB(T x)");
  4253. begin_scope();
  4254. statement("T v = select(x, T(-1) - x, x < T(0));");
  4255. statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
  4256. end_scope();
  4257. statement("");
  4258. break;
  4259. case SPVFuncImplSSign:
  4260. statement("// Implementation of the GLSL sign() function for integer types");
  4261. statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
  4262. statement("inline T sign(T x)");
  4263. begin_scope();
  4264. statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
  4265. end_scope();
  4266. statement("");
  4267. break;
  4268. case SPVFuncImplArrayCopy:
  4269. case SPVFuncImplArrayOfArrayCopy2Dim:
  4270. case SPVFuncImplArrayOfArrayCopy3Dim:
  4271. case SPVFuncImplArrayOfArrayCopy4Dim:
  4272. case SPVFuncImplArrayOfArrayCopy5Dim:
  4273. case SPVFuncImplArrayOfArrayCopy6Dim:
  4274. {
  4275. // Unfortunately we cannot template on the address space, so combinatorial explosion it is.
  4276. static const char *function_name_tags[] = {
  4277. "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack",
  4278. "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup",
  4279. "FromDeviceToDevice", "FromConstantToDevice", "FromStackToDevice",
  4280. "FromThreadGroupToDevice", "FromDeviceToStack", "FromDeviceToThreadGroup",
  4281. };
  4282. static const char *src_address_space[] = {
  4283. "constant", "constant", "thread const", "thread const",
  4284. "threadgroup const", "threadgroup const", "device const", "constant",
  4285. "thread const", "threadgroup const", "device const", "device const",
  4286. };
  4287. static const char *dst_address_space[] = {
  4288. "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
  4289. "device", "device", "device", "device", "thread", "threadgroup",
  4290. };
  4291. for (uint32_t variant = 0; variant < 12; variant++)
  4292. {
  4293. uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
  4294. string tmp = "template<typename T";
  4295. for (uint8_t i = 0; i < dimensions; i++)
  4296. {
  4297. tmp += ", uint ";
  4298. tmp += 'A' + i;
  4299. }
  4300. tmp += ">";
  4301. statement(tmp);
  4302. string array_arg;
  4303. for (uint8_t i = 0; i < dimensions; i++)
  4304. {
  4305. array_arg += "[";
  4306. array_arg += 'A' + i;
  4307. array_arg += "]";
  4308. }
  4309. statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(",
  4310. dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant],
  4311. " T (&src)", array_arg, ")");
  4312. begin_scope();
  4313. statement("for (uint i = 0; i < A; i++)");
  4314. begin_scope();
  4315. if (dimensions == 1)
  4316. statement("dst[i] = src[i];");
  4317. else
  4318. statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
  4319. end_scope();
  4320. end_scope();
  4321. statement("");
  4322. }
  4323. break;
  4324. }
  4325. // Support for Metal 2.1's new texture_buffer type.
  4326. case SPVFuncImplTexelBufferCoords:
  4327. {
  4328. if (msl_options.texel_buffer_texture_width > 0)
  4329. {
  4330. string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
  4331. statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
  4332. statement(force_inline);
  4333. statement("uint2 spvTexelBufferCoord(uint tc)");
  4334. begin_scope();
  4335. statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
  4336. end_scope();
  4337. statement("");
  4338. }
  4339. else
  4340. {
  4341. statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
  4342. statement(
  4343. "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())");
  4344. statement("");
  4345. }
  4346. break;
  4347. }
  4348. // Emulate texture2D atomic operations
  4349. case SPVFuncImplImage2DAtomicCoords:
  4350. {
  4351. if (msl_options.supports_msl_version(1, 2))
  4352. {
  4353. statement("// The required alignment of a linear texture of R32Uint format.");
  4354. statement("constant uint spvLinearTextureAlignmentOverride [[function_constant(",
  4355. msl_options.r32ui_alignment_constant_id, ")]];");
  4356. statement("constant uint spvLinearTextureAlignment = ",
  4357. "is_function_constant_defined(spvLinearTextureAlignmentOverride) ? ",
  4358. "spvLinearTextureAlignmentOverride : ", msl_options.r32ui_linear_texture_alignment, ";");
  4359. }
  4360. else
  4361. {
  4362. statement("// The required alignment of a linear texture of R32Uint format.");
  4363. statement("constant uint spvLinearTextureAlignment = ", msl_options.r32ui_linear_texture_alignment,
  4364. ";");
  4365. }
  4366. statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics");
  4367. statement("#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + ",
  4368. " spvLinearTextureAlignment / 4 - 1) & ~(",
  4369. " spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)");
  4370. statement("");
  4371. break;
  4372. }
  4373. // "fadd" intrinsic support
  4374. case SPVFuncImplFAdd:
  4375. statement("template<typename T>");
  4376. statement("[[clang::optnone]] T spvFAdd(T l, T r)");
  4377. begin_scope();
  4378. statement("return fma(T(1), l, r);");
  4379. end_scope();
  4380. statement("");
  4381. break;
  4382. // "fsub" intrinsic support
  4383. case SPVFuncImplFSub:
  4384. statement("template<typename T>");
  4385. statement("[[clang::optnone]] T spvFSub(T l, T r)");
  4386. begin_scope();
  4387. statement("return fma(T(-1), r, l);");
  4388. end_scope();
  4389. statement("");
  4390. break;
  4391. // "fmul' intrinsic support
  4392. case SPVFuncImplFMul:
  4393. statement("template<typename T>");
  4394. statement("[[clang::optnone]] T spvFMul(T l, T r)");
  4395. begin_scope();
  4396. statement("return fma(l, r, T(0));");
  4397. end_scope();
  4398. statement("");
  4399. statement("template<typename T, int Cols, int Rows>");
  4400. statement("[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)");
  4401. begin_scope();
  4402. statement("vec<T, Cols> res = vec<T, Cols>(0);");
  4403. statement("for (uint i = Rows; i > 0; --i)");
  4404. begin_scope();
  4405. statement("vec<T, Cols> tmp(0);");
  4406. statement("for (uint j = 0; j < Cols; ++j)");
  4407. begin_scope();
  4408. statement("tmp[j] = m[j][i - 1];");
  4409. end_scope();
  4410. statement("res = fma(tmp, vec<T, Cols>(v[i - 1]), res);");
  4411. end_scope();
  4412. statement("return res;");
  4413. end_scope();
  4414. statement("");
  4415. statement("template<typename T, int Cols, int Rows>");
  4416. statement("[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)");
  4417. begin_scope();
  4418. statement("vec<T, Rows> res = vec<T, Rows>(0);");
  4419. statement("for (uint i = Cols; i > 0; --i)");
  4420. begin_scope();
  4421. statement("res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);");
  4422. end_scope();
  4423. statement("return res;");
  4424. end_scope();
  4425. statement("");
  4426. statement("template<typename T, int LCols, int LRows, int RCols, int RRows>");
  4427. statement("[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)");
  4428. begin_scope();
  4429. statement("matrix<T, RCols, LRows> res;");
  4430. statement("for (uint i = 0; i < RCols; i++)");
  4431. begin_scope();
  4432. statement("vec<T, RCols> tmp(0);");
  4433. statement("for (uint j = 0; j < LCols; j++)");
  4434. begin_scope();
  4435. statement("tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);");
  4436. end_scope();
  4437. statement("res[i] = tmp;");
  4438. end_scope();
  4439. statement("return res;");
  4440. end_scope();
  4441. statement("");
  4442. break;
  4443. case SPVFuncImplQuantizeToF16:
  4444. // Ensure fast-math is disabled to match Vulkan results.
  4445. // SpvHalfTypeSelector is used to match the half* template type to the float* template type.
  4446. // Depending on GPU, MSL does not always flush converted subnormal halfs to zero,
  4447. // as required by OpQuantizeToF16, so check for subnormals and flush them to zero.
  4448. statement("template <typename F> struct SpvHalfTypeSelector;");
  4449. statement("template <> struct SpvHalfTypeSelector<float> { public: using H = half; };");
  4450. statement("template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };");
  4451. statement("template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>");
  4452. statement("[[clang::optnone]] F spvQuantizeToF16(F fval)");
  4453. begin_scope();
  4454. statement("H hval = H(fval);");
  4455. statement("hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));");
  4456. statement("return F(hval);");
  4457. end_scope();
  4458. statement("");
  4459. break;
  4460. // Emulate texturecube_array with texture2d_array for iOS where this type is not available
  4461. case SPVFuncImplCubemapTo2DArrayFace:
  4462. statement(force_inline);
  4463. statement("float3 spvCubemapTo2DArrayFace(float3 P)");
  4464. begin_scope();
  4465. statement("float3 Coords = abs(P.xyz);");
  4466. statement("float CubeFace = 0;");
  4467. statement("float ProjectionAxis = 0;");
  4468. statement("float u = 0;");
  4469. statement("float v = 0;");
  4470. statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)");
  4471. begin_scope();
  4472. statement("CubeFace = P.x >= 0 ? 0 : 1;");
  4473. statement("ProjectionAxis = Coords.x;");
  4474. statement("u = P.x >= 0 ? -P.z : P.z;");
  4475. statement("v = -P.y;");
  4476. end_scope();
  4477. statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)");
  4478. begin_scope();
  4479. statement("CubeFace = P.y >= 0 ? 2 : 3;");
  4480. statement("ProjectionAxis = Coords.y;");
  4481. statement("u = P.x;");
  4482. statement("v = P.y >= 0 ? P.z : -P.z;");
  4483. end_scope();
  4484. statement("else");
  4485. begin_scope();
  4486. statement("CubeFace = P.z >= 0 ? 4 : 5;");
  4487. statement("ProjectionAxis = Coords.z;");
  4488. statement("u = P.z >= 0 ? P.x : -P.x;");
  4489. statement("v = -P.y;");
  4490. end_scope();
  4491. statement("u = 0.5 * (u/ProjectionAxis + 1);");
  4492. statement("v = 0.5 * (v/ProjectionAxis + 1);");
  4493. statement("return float3(u, v, CubeFace);");
  4494. end_scope();
  4495. statement("");
  4496. break;
  4497. case SPVFuncImplInverse4x4:
  4498. statement("// Returns the determinant of a 2x2 matrix.");
  4499. statement(force_inline);
  4500. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  4501. begin_scope();
  4502. statement("return a1 * b2 - b1 * a2;");
  4503. end_scope();
  4504. statement("");
  4505. statement("// Returns the determinant of a 3x3 matrix.");
  4506. statement(force_inline);
  4507. statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
  4508. "float c2, float c3)");
  4509. begin_scope();
  4510. statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, "
  4511. "b2, b3);");
  4512. end_scope();
  4513. statement("");
  4514. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  4515. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  4516. statement(force_inline);
  4517. statement("float4x4 spvInverse4x4(float4x4 m)");
  4518. begin_scope();
  4519. statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
  4520. statement_no_indent("");
  4521. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  4522. statement("adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  4523. "m[3][3]);");
  4524. statement("adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  4525. "m[3][3]);");
  4526. statement("adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
  4527. "m[3][3]);");
  4528. statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
  4529. "m[2][3]);");
  4530. statement_no_indent("");
  4531. statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  4532. "m[3][3]);");
  4533. statement("adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  4534. "m[3][3]);");
  4535. statement("adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
  4536. "m[3][3]);");
  4537. statement("adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
  4538. "m[2][3]);");
  4539. statement_no_indent("");
  4540. statement("adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  4541. "m[3][3]);");
  4542. statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  4543. "m[3][3]);");
  4544. statement("adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
  4545. "m[3][3]);");
  4546. statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
  4547. "m[2][3]);");
  4548. statement_no_indent("");
  4549. statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  4550. "m[3][2]);");
  4551. statement("adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  4552. "m[3][2]);");
  4553. statement("adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
  4554. "m[3][2]);");
  4555. statement("adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
  4556. "m[2][2]);");
  4557. statement_no_indent("");
  4558. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  4559. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
  4560. "* m[3][0]);");
  4561. statement_no_indent("");
  4562. statement("// Divide the classical adjoint matrix by the determinant.");
  4563. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  4564. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  4565. end_scope();
  4566. statement("");
  4567. break;
  4568. case SPVFuncImplInverse3x3:
  4569. if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0)
  4570. {
  4571. statement("// Returns the determinant of a 2x2 matrix.");
  4572. statement(force_inline);
  4573. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  4574. begin_scope();
  4575. statement("return a1 * b2 - b1 * a2;");
  4576. end_scope();
  4577. statement("");
  4578. }
  4579. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  4580. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  4581. statement(force_inline);
  4582. statement("float3x3 spvInverse3x3(float3x3 m)");
  4583. begin_scope();
  4584. statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
  4585. statement_no_indent("");
  4586. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  4587. statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
  4588. statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
  4589. statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
  4590. statement_no_indent("");
  4591. statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
  4592. statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
  4593. statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
  4594. statement_no_indent("");
  4595. statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
  4596. statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
  4597. statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
  4598. statement_no_indent("");
  4599. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  4600. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
  4601. statement_no_indent("");
  4602. statement("// Divide the classical adjoint matrix by the determinant.");
  4603. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  4604. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  4605. end_scope();
  4606. statement("");
  4607. break;
  4608. case SPVFuncImplInverse2x2:
  4609. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  4610. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  4611. statement(force_inline);
  4612. statement("float2x2 spvInverse2x2(float2x2 m)");
  4613. begin_scope();
  4614. statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
  4615. statement_no_indent("");
  4616. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  4617. statement("adj[0][0] = m[1][1];");
  4618. statement("adj[0][1] = -m[0][1];");
  4619. statement_no_indent("");
  4620. statement("adj[1][0] = -m[1][0];");
  4621. statement("adj[1][1] = m[0][0];");
  4622. statement_no_indent("");
  4623. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  4624. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
  4625. statement_no_indent("");
  4626. statement("// Divide the classical adjoint matrix by the determinant.");
  4627. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  4628. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  4629. end_scope();
  4630. statement("");
  4631. break;
  4632. case SPVFuncImplForwardArgs:
  4633. statement("template<typename T> struct spvRemoveReference { typedef T type; };");
  4634. statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
  4635. statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
  4636. statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
  4637. "spvRemoveReference<T>::type& x)");
  4638. begin_scope();
  4639. statement("return static_cast<thread T&&>(x);");
  4640. end_scope();
  4641. statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
  4642. "spvRemoveReference<T>::type&& x)");
  4643. begin_scope();
  4644. statement("return static_cast<thread T&&>(x);");
  4645. end_scope();
  4646. statement("");
  4647. break;
  4648. case SPVFuncImplGetSwizzle:
  4649. statement("enum class spvSwizzle : uint");
  4650. begin_scope();
  4651. statement("none = 0,");
  4652. statement("zero,");
  4653. statement("one,");
  4654. statement("red,");
  4655. statement("green,");
  4656. statement("blue,");
  4657. statement("alpha");
  4658. end_scope_decl();
  4659. statement("");
  4660. statement("template<typename T>");
  4661. statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
  4662. begin_scope();
  4663. statement("switch (s)");
  4664. begin_scope();
  4665. statement("case spvSwizzle::none:");
  4666. statement(" return c;");
  4667. statement("case spvSwizzle::zero:");
  4668. statement(" return 0;");
  4669. statement("case spvSwizzle::one:");
  4670. statement(" return 1;");
  4671. statement("case spvSwizzle::red:");
  4672. statement(" return x.r;");
  4673. statement("case spvSwizzle::green:");
  4674. statement(" return x.g;");
  4675. statement("case spvSwizzle::blue:");
  4676. statement(" return x.b;");
  4677. statement("case spvSwizzle::alpha:");
  4678. statement(" return x.a;");
  4679. end_scope();
  4680. end_scope();
  4681. statement("");
  4682. break;
  4683. case SPVFuncImplTextureSwizzle:
  4684. statement("// Wrapper function that swizzles texture samples and fetches.");
  4685. statement("template<typename T>");
  4686. statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
  4687. begin_scope();
  4688. statement("if (!s)");
  4689. statement(" return x;");
  4690. statement("return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), "
  4691. "spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) "
  4692. "& 0xFF)), "
  4693. "spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));");
  4694. end_scope();
  4695. statement("");
  4696. statement("template<typename T>");
  4697. statement("inline T spvTextureSwizzle(T x, uint s)");
  4698. begin_scope();
  4699. statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
  4700. end_scope();
  4701. statement("");
  4702. break;
  4703. case SPVFuncImplGatherSwizzle:
  4704. statement("// Wrapper function that swizzles texture gathers.");
  4705. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  4706. "typename... Ts>");
  4707. statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, "
  4708. "uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
  4709. begin_scope();
  4710. statement("if (sw)");
  4711. begin_scope();
  4712. statement("switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))");
  4713. begin_scope();
  4714. statement("case spvSwizzle::none:");
  4715. statement(" break;");
  4716. statement("case spvSwizzle::zero:");
  4717. statement(" return vec<T, 4>(0, 0, 0, 0);");
  4718. statement("case spvSwizzle::one:");
  4719. statement(" return vec<T, 4>(1, 1, 1, 1);");
  4720. statement("case spvSwizzle::red:");
  4721. statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);");
  4722. statement("case spvSwizzle::green:");
  4723. statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);");
  4724. statement("case spvSwizzle::blue:");
  4725. statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);");
  4726. statement("case spvSwizzle::alpha:");
  4727. statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);");
  4728. end_scope();
  4729. end_scope();
  4730. // texture::gather insists on its component parameter being a constant
  4731. // expression, so we need this silly workaround just to compile the shader.
  4732. statement("switch (c)");
  4733. begin_scope();
  4734. statement("case component::x:");
  4735. statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);");
  4736. statement("case component::y:");
  4737. statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);");
  4738. statement("case component::z:");
  4739. statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);");
  4740. statement("case component::w:");
  4741. statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);");
  4742. end_scope();
  4743. end_scope();
  4744. statement("");
  4745. break;
  4746. case SPVFuncImplGatherCompareSwizzle:
  4747. statement("// Wrapper function that swizzles depth texture gathers.");
  4748. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  4749. "typename... Ts>");
  4750. statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler "
  4751. "s, uint sw, Ts... params) ");
  4752. begin_scope();
  4753. statement("if (sw)");
  4754. begin_scope();
  4755. statement("switch (spvSwizzle(sw & 0xFF))");
  4756. begin_scope();
  4757. statement("case spvSwizzle::none:");
  4758. statement("case spvSwizzle::red:");
  4759. statement(" break;");
  4760. statement("case spvSwizzle::zero:");
  4761. statement("case spvSwizzle::green:");
  4762. statement("case spvSwizzle::blue:");
  4763. statement("case spvSwizzle::alpha:");
  4764. statement(" return vec<T, 4>(0, 0, 0, 0);");
  4765. statement("case spvSwizzle::one:");
  4766. statement(" return vec<T, 4>(1, 1, 1, 1);");
  4767. end_scope();
  4768. end_scope();
  4769. statement("return t.gather_compare(s, spvForward<Ts>(params)...);");
  4770. end_scope();
  4771. statement("");
  4772. break;
  4773. case SPVFuncImplSubgroupBroadcast:
  4774. // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
  4775. // them as integers.
  4776. statement("template<typename T>");
  4777. statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
  4778. begin_scope();
  4779. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4780. statement("return quad_broadcast(value, lane);");
  4781. else
  4782. statement("return simd_broadcast(value, lane);");
  4783. end_scope();
  4784. statement("");
  4785. statement("template<>");
  4786. statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
  4787. begin_scope();
  4788. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4789. statement("return !!quad_broadcast((ushort)value, lane);");
  4790. else
  4791. statement("return !!simd_broadcast((ushort)value, lane);");
  4792. end_scope();
  4793. statement("");
  4794. statement("template<uint N>");
  4795. statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
  4796. begin_scope();
  4797. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4798. statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
  4799. else
  4800. statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
  4801. end_scope();
  4802. statement("");
  4803. break;
  4804. case SPVFuncImplSubgroupBroadcastFirst:
  4805. statement("template<typename T>");
  4806. statement("inline T spvSubgroupBroadcastFirst(T value)");
  4807. begin_scope();
  4808. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4809. statement("return quad_broadcast_first(value);");
  4810. else
  4811. statement("return simd_broadcast_first(value);");
  4812. end_scope();
  4813. statement("");
  4814. statement("template<>");
  4815. statement("inline bool spvSubgroupBroadcastFirst(bool value)");
  4816. begin_scope();
  4817. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4818. statement("return !!quad_broadcast_first((ushort)value);");
  4819. else
  4820. statement("return !!simd_broadcast_first((ushort)value);");
  4821. end_scope();
  4822. statement("");
  4823. statement("template<uint N>");
  4824. statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
  4825. begin_scope();
  4826. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4827. statement("return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);");
  4828. else
  4829. statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
  4830. end_scope();
  4831. statement("");
  4832. break;
  4833. case SPVFuncImplSubgroupBallot:
  4834. statement("inline uint4 spvSubgroupBallot(bool value)");
  4835. begin_scope();
  4836. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4837. {
  4838. statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);");
  4839. }
  4840. else if (msl_options.is_ios())
  4841. {
  4842. // The current simd_vote on iOS uses a 32-bit integer-like object.
  4843. statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);");
  4844. }
  4845. else
  4846. {
  4847. statement("simd_vote vote = simd_ballot(value);");
  4848. statement("// simd_ballot() returns a 64-bit integer-like object, but");
  4849. statement("// SPIR-V callers expect a uint4. We must convert.");
  4850. statement("// FIXME: This won't include higher bits if Apple ever supports");
  4851. statement("// 128 lanes in an SIMD-group.");
  4852. statement("return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);");
  4853. }
  4854. end_scope();
  4855. statement("");
  4856. break;
  4857. case SPVFuncImplSubgroupBallotBitExtract:
  4858. statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)");
  4859. begin_scope();
  4860. statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);");
  4861. end_scope();
  4862. statement("");
  4863. break;
  4864. case SPVFuncImplSubgroupBallotFindLSB:
  4865. statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
  4866. begin_scope();
  4867. if (msl_options.is_ios())
  4868. {
  4869. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  4870. }
  4871. else
  4872. {
  4873. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  4874. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  4875. }
  4876. statement("ballot &= mask;");
  4877. statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
  4878. "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
  4879. end_scope();
  4880. statement("");
  4881. break;
  4882. case SPVFuncImplSubgroupBallotFindMSB:
  4883. statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
  4884. begin_scope();
  4885. if (msl_options.is_ios())
  4886. {
  4887. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  4888. }
  4889. else
  4890. {
  4891. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  4892. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  4893. }
  4894. statement("ballot &= mask;");
  4895. statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
  4896. "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
  4897. "ballot.z == 0), ballot.w == 0);");
  4898. end_scope();
  4899. statement("");
  4900. break;
  4901. case SPVFuncImplSubgroupBallotBitCount:
  4902. statement("inline uint spvPopCount4(uint4 ballot)");
  4903. begin_scope();
  4904. statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
  4905. end_scope();
  4906. statement("");
  4907. statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
  4908. begin_scope();
  4909. if (msl_options.is_ios())
  4910. {
  4911. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  4912. }
  4913. else
  4914. {
  4915. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  4916. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  4917. }
  4918. statement("return spvPopCount4(ballot & mask);");
  4919. end_scope();
  4920. statement("");
  4921. statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
  4922. begin_scope();
  4923. if (msl_options.is_ios())
  4924. {
  4925. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));");
  4926. }
  4927. else
  4928. {
  4929. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
  4930. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
  4931. "uint2(0));");
  4932. }
  4933. statement("return spvPopCount4(ballot & mask);");
  4934. end_scope();
  4935. statement("");
  4936. statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
  4937. begin_scope();
  4938. if (msl_options.is_ios())
  4939. {
  4940. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));");
  4941. }
  4942. else
  4943. {
  4944. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
  4945. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
  4946. }
  4947. statement("return spvPopCount4(ballot & mask);");
  4948. end_scope();
  4949. statement("");
  4950. break;
  4951. case SPVFuncImplSubgroupAllEqual:
  4952. // Metal doesn't provide a function to evaluate this directly. But, we can
  4953. // implement this by comparing every thread's value to one thread's value
  4954. // (in this case, the value of the first active thread). Then, by the transitive
  4955. // property of equality, if all comparisons return true, then they are all equal.
  4956. statement("template<typename T>");
  4957. statement("inline bool spvSubgroupAllEqual(T value)");
  4958. begin_scope();
  4959. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4960. statement("return quad_all(all(value == quad_broadcast_first(value)));");
  4961. else
  4962. statement("return simd_all(all(value == simd_broadcast_first(value)));");
  4963. end_scope();
  4964. statement("");
  4965. statement("template<>");
  4966. statement("inline bool spvSubgroupAllEqual(bool value)");
  4967. begin_scope();
  4968. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4969. statement("return quad_all(value) || !quad_any(value);");
  4970. else
  4971. statement("return simd_all(value) || !simd_any(value);");
  4972. end_scope();
  4973. statement("");
  4974. statement("template<uint N>");
  4975. statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
  4976. begin_scope();
  4977. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4978. statement("return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));");
  4979. else
  4980. statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
  4981. end_scope();
  4982. statement("");
  4983. break;
  4984. case SPVFuncImplSubgroupShuffle:
  4985. statement("template<typename T>");
  4986. statement("inline T spvSubgroupShuffle(T value, ushort lane)");
  4987. begin_scope();
  4988. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4989. statement("return quad_shuffle(value, lane);");
  4990. else
  4991. statement("return simd_shuffle(value, lane);");
  4992. end_scope();
  4993. statement("");
  4994. statement("template<>");
  4995. statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
  4996. begin_scope();
  4997. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  4998. statement("return !!quad_shuffle((ushort)value, lane);");
  4999. else
  5000. statement("return !!simd_shuffle((ushort)value, lane);");
  5001. end_scope();
  5002. statement("");
  5003. statement("template<uint N>");
  5004. statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
  5005. begin_scope();
  5006. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5007. statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
  5008. else
  5009. statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
  5010. end_scope();
  5011. statement("");
  5012. break;
  5013. case SPVFuncImplSubgroupShuffleXor:
  5014. statement("template<typename T>");
  5015. statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
  5016. begin_scope();
  5017. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5018. statement("return quad_shuffle_xor(value, mask);");
  5019. else
  5020. statement("return simd_shuffle_xor(value, mask);");
  5021. end_scope();
  5022. statement("");
  5023. statement("template<>");
  5024. statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
  5025. begin_scope();
  5026. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5027. statement("return !!quad_shuffle_xor((ushort)value, mask);");
  5028. else
  5029. statement("return !!simd_shuffle_xor((ushort)value, mask);");
  5030. end_scope();
  5031. statement("");
  5032. statement("template<uint N>");
  5033. statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
  5034. begin_scope();
  5035. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5036. statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
  5037. else
  5038. statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
  5039. end_scope();
  5040. statement("");
  5041. break;
  5042. case SPVFuncImplSubgroupShuffleUp:
  5043. statement("template<typename T>");
  5044. statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
  5045. begin_scope();
  5046. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5047. statement("return quad_shuffle_up(value, delta);");
  5048. else
  5049. statement("return simd_shuffle_up(value, delta);");
  5050. end_scope();
  5051. statement("");
  5052. statement("template<>");
  5053. statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
  5054. begin_scope();
  5055. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5056. statement("return !!quad_shuffle_up((ushort)value, delta);");
  5057. else
  5058. statement("return !!simd_shuffle_up((ushort)value, delta);");
  5059. end_scope();
  5060. statement("");
  5061. statement("template<uint N>");
  5062. statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
  5063. begin_scope();
  5064. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5065. statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
  5066. else
  5067. statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
  5068. end_scope();
  5069. statement("");
  5070. break;
  5071. case SPVFuncImplSubgroupShuffleDown:
  5072. statement("template<typename T>");
  5073. statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
  5074. begin_scope();
  5075. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5076. statement("return quad_shuffle_down(value, delta);");
  5077. else
  5078. statement("return simd_shuffle_down(value, delta);");
  5079. end_scope();
  5080. statement("");
  5081. statement("template<>");
  5082. statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
  5083. begin_scope();
  5084. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5085. statement("return !!quad_shuffle_down((ushort)value, delta);");
  5086. else
  5087. statement("return !!simd_shuffle_down((ushort)value, delta);");
  5088. end_scope();
  5089. statement("");
  5090. statement("template<uint N>");
  5091. statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
  5092. begin_scope();
  5093. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  5094. statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
  5095. else
  5096. statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
  5097. end_scope();
  5098. statement("");
  5099. break;
  5100. case SPVFuncImplQuadBroadcast:
  5101. statement("template<typename T>");
  5102. statement("inline T spvQuadBroadcast(T value, uint lane)");
  5103. begin_scope();
  5104. statement("return quad_broadcast(value, lane);");
  5105. end_scope();
  5106. statement("");
  5107. statement("template<>");
  5108. statement("inline bool spvQuadBroadcast(bool value, uint lane)");
  5109. begin_scope();
  5110. statement("return !!quad_broadcast((ushort)value, lane);");
  5111. end_scope();
  5112. statement("");
  5113. statement("template<uint N>");
  5114. statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
  5115. begin_scope();
  5116. statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
  5117. end_scope();
  5118. statement("");
  5119. break;
  5120. case SPVFuncImplQuadSwap:
  5121. // We can implement this easily based on the following table giving
  5122. // the target lane ID from the direction and current lane ID:
  5123. // Direction
  5124. // | 0 | 1 | 2 |
  5125. // ---+---+---+---+
  5126. // L 0 | 1 2 3
  5127. // a 1 | 0 3 2
  5128. // n 2 | 3 0 1
  5129. // e 3 | 2 1 0
  5130. // Notice that target = source ^ (direction + 1).
  5131. statement("template<typename T>");
  5132. statement("inline T spvQuadSwap(T value, uint dir)");
  5133. begin_scope();
  5134. statement("return quad_shuffle_xor(value, dir + 1);");
  5135. end_scope();
  5136. statement("");
  5137. statement("template<>");
  5138. statement("inline bool spvQuadSwap(bool value, uint dir)");
  5139. begin_scope();
  5140. statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
  5141. end_scope();
  5142. statement("");
  5143. statement("template<uint N>");
  5144. statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
  5145. begin_scope();
  5146. statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
  5147. end_scope();
  5148. statement("");
  5149. break;
  5150. case SPVFuncImplReflectScalar:
  5151. // Metal does not support scalar versions of these functions.
  5152. // Ensure fast-math is disabled to match Vulkan results.
  5153. statement("template<typename T>");
  5154. statement("[[clang::optnone]] T spvReflect(T i, T n)");
  5155. begin_scope();
  5156. statement("return i - T(2) * i * n * n;");
  5157. end_scope();
  5158. statement("");
  5159. break;
  5160. case SPVFuncImplRefractScalar:
  5161. // Metal does not support scalar versions of these functions.
  5162. statement("template<typename T>");
  5163. statement("inline T spvRefract(T i, T n, T eta)");
  5164. begin_scope();
  5165. statement("T NoI = n * i;");
  5166. statement("T NoI2 = NoI * NoI;");
  5167. statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
  5168. statement("if (k < T(0))");
  5169. begin_scope();
  5170. statement("return T(0);");
  5171. end_scope();
  5172. statement("else");
  5173. begin_scope();
  5174. statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
  5175. end_scope();
  5176. end_scope();
  5177. statement("");
  5178. break;
  5179. case SPVFuncImplFaceForwardScalar:
  5180. // Metal does not support scalar versions of these functions.
  5181. statement("template<typename T>");
  5182. statement("inline T spvFaceForward(T n, T i, T nref)");
  5183. begin_scope();
  5184. statement("return i * nref < T(0) ? n : -n;");
  5185. end_scope();
  5186. statement("");
  5187. break;
  5188. case SPVFuncImplChromaReconstructNearest2Plane:
  5189. statement("template<typename T, typename... LodOptions>");
  5190. statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler "
  5191. "samp, float2 coord, LodOptions... options)");
  5192. begin_scope();
  5193. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5194. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5195. statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
  5196. statement("return ycbcr;");
  5197. end_scope();
  5198. statement("");
  5199. break;
  5200. case SPVFuncImplChromaReconstructNearest3Plane:
  5201. statement("template<typename T, typename... LodOptions>");
  5202. statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, "
  5203. "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5204. begin_scope();
  5205. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5206. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5207. statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5208. statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5209. statement("return ycbcr;");
  5210. end_scope();
  5211. statement("");
  5212. break;
  5213. case SPVFuncImplChromaReconstructLinear422CositedEven2Plane:
  5214. statement("template<typename T, typename... LodOptions>");
  5215. statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
  5216. "plane1, sampler samp, float2 coord, LodOptions... options)");
  5217. begin_scope();
  5218. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5219. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5220. statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
  5221. begin_scope();
  5222. statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5223. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);");
  5224. end_scope();
  5225. statement("else");
  5226. begin_scope();
  5227. statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
  5228. end_scope();
  5229. statement("return ycbcr;");
  5230. end_scope();
  5231. statement("");
  5232. break;
  5233. case SPVFuncImplChromaReconstructLinear422CositedEven3Plane:
  5234. statement("template<typename T, typename... LodOptions>");
  5235. statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
  5236. "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5237. begin_scope();
  5238. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5239. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5240. statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
  5241. begin_scope();
  5242. statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5243. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
  5244. statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5245. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
  5246. end_scope();
  5247. statement("else");
  5248. begin_scope();
  5249. statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5250. statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5251. end_scope();
  5252. statement("return ycbcr;");
  5253. end_scope();
  5254. statement("");
  5255. break;
  5256. case SPVFuncImplChromaReconstructLinear422Midpoint2Plane:
  5257. statement("template<typename T, typename... LodOptions>");
  5258. statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
  5259. "plane1, sampler samp, float2 coord, LodOptions... options)");
  5260. begin_scope();
  5261. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5262. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5263. statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
  5264. statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5265. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);");
  5266. statement("return ycbcr;");
  5267. end_scope();
  5268. statement("");
  5269. break;
  5270. case SPVFuncImplChromaReconstructLinear422Midpoint3Plane:
  5271. statement("template<typename T, typename... LodOptions>");
  5272. statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
  5273. "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5274. begin_scope();
  5275. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5276. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5277. statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
  5278. statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5279. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
  5280. statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5281. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
  5282. statement("return ycbcr;");
  5283. end_scope();
  5284. statement("");
  5285. break;
  5286. case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane:
  5287. statement("template<typename T, typename... LodOptions>");
  5288. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
  5289. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  5290. begin_scope();
  5291. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5292. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5293. statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
  5294. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5295. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5296. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5297. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  5298. statement("return ycbcr;");
  5299. end_scope();
  5300. statement("");
  5301. break;
  5302. case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane:
  5303. statement("template<typename T, typename... LodOptions>");
  5304. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
  5305. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5306. begin_scope();
  5307. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5308. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5309. statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
  5310. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5311. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5312. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5313. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5314. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5315. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5316. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5317. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5318. statement("return ycbcr;");
  5319. end_scope();
  5320. statement("");
  5321. break;
  5322. case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane:
  5323. statement("template<typename T, typename... LodOptions>");
  5324. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
  5325. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  5326. begin_scope();
  5327. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5328. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5329. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  5330. "0)) * 0.5);");
  5331. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5332. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5333. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5334. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  5335. statement("return ycbcr;");
  5336. end_scope();
  5337. statement("");
  5338. break;
  5339. case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane:
  5340. statement("template<typename T, typename... LodOptions>");
  5341. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
  5342. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5343. begin_scope();
  5344. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5345. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5346. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  5347. "0)) * 0.5);");
  5348. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5349. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5350. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5351. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5352. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5353. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5354. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5355. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5356. statement("return ycbcr;");
  5357. end_scope();
  5358. statement("");
  5359. break;
  5360. case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane:
  5361. statement("template<typename T, typename... LodOptions>");
  5362. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
  5363. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  5364. begin_scope();
  5365. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5366. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5367. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
  5368. "0.5)) * 0.5);");
  5369. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5370. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5371. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5372. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  5373. statement("return ycbcr;");
  5374. end_scope();
  5375. statement("");
  5376. break;
  5377. case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane:
  5378. statement("template<typename T, typename... LodOptions>");
  5379. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
  5380. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5381. begin_scope();
  5382. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5383. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5384. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
  5385. "0.5)) * 0.5);");
  5386. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5387. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5388. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5389. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5390. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5391. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5392. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5393. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5394. statement("return ycbcr;");
  5395. end_scope();
  5396. statement("");
  5397. break;
  5398. case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane:
  5399. statement("template<typename T, typename... LodOptions>");
  5400. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
  5401. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  5402. begin_scope();
  5403. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5404. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5405. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  5406. "0.5)) * 0.5);");
  5407. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5408. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5409. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5410. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  5411. statement("return ycbcr;");
  5412. end_scope();
  5413. statement("");
  5414. break;
  5415. case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane:
  5416. statement("template<typename T, typename... LodOptions>");
  5417. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
  5418. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  5419. begin_scope();
  5420. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  5421. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  5422. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  5423. "0.5)) * 0.5);");
  5424. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5425. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5426. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5427. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5428. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  5429. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  5430. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  5431. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  5432. statement("return ycbcr;");
  5433. end_scope();
  5434. statement("");
  5435. break;
  5436. case SPVFuncImplExpandITUFullRange:
  5437. statement("template<typename T>");
  5438. statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)");
  5439. begin_scope();
  5440. statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);");
  5441. statement("return ycbcr;");
  5442. end_scope();
  5443. statement("");
  5444. break;
  5445. case SPVFuncImplExpandITUNarrowRange:
  5446. statement("template<typename T>");
  5447. statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)");
  5448. begin_scope();
  5449. statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);");
  5450. statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);");
  5451. statement("return ycbcr;");
  5452. end_scope();
  5453. statement("");
  5454. break;
  5455. case SPVFuncImplConvertYCbCrBT709:
  5456. statement("// cf. Khronos Data Format Specification, section 15.1.1");
  5457. statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, "
  5458. "-0.33480248/0.7152, 0}};");
  5459. statement("");
  5460. statement("template<typename T>");
  5461. statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)");
  5462. begin_scope();
  5463. statement("vec<T, 4> rgba;");
  5464. statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);");
  5465. statement("rgba.a = ycbcr.a;");
  5466. statement("return rgba;");
  5467. end_scope();
  5468. statement("");
  5469. break;
  5470. case SPVFuncImplConvertYCbCrBT601:
  5471. statement("// cf. Khronos Data Format Specification, section 15.1.2");
  5472. statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, "
  5473. "-0.419198/0.587, 0}};");
  5474. statement("");
  5475. statement("template<typename T>");
  5476. statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)");
  5477. begin_scope();
  5478. statement("vec<T, 4> rgba;");
  5479. statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);");
  5480. statement("rgba.a = ycbcr.a;");
  5481. statement("return rgba;");
  5482. end_scope();
  5483. statement("");
  5484. break;
  5485. case SPVFuncImplConvertYCbCrBT2020:
  5486. statement("// cf. Khronos Data Format Specification, section 15.1.3");
  5487. statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, "
  5488. "-0.38737742/0.6780, 0}};");
  5489. statement("");
  5490. statement("template<typename T>");
  5491. statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)");
  5492. begin_scope();
  5493. statement("vec<T, 4> rgba;");
  5494. statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);");
  5495. statement("rgba.a = ycbcr.a;");
  5496. statement("return rgba;");
  5497. end_scope();
  5498. statement("");
  5499. break;
  5500. case SPVFuncImplDynamicImageSampler:
  5501. statement("enum class spvFormatResolution");
  5502. begin_scope();
  5503. statement("_444 = 0,");
  5504. statement("_422,");
  5505. statement("_420");
  5506. end_scope_decl();
  5507. statement("");
  5508. statement("enum class spvChromaFilter");
  5509. begin_scope();
  5510. statement("nearest = 0,");
  5511. statement("linear");
  5512. end_scope_decl();
  5513. statement("");
  5514. statement("enum class spvXChromaLocation");
  5515. begin_scope();
  5516. statement("cosited_even = 0,");
  5517. statement("midpoint");
  5518. end_scope_decl();
  5519. statement("");
  5520. statement("enum class spvYChromaLocation");
  5521. begin_scope();
  5522. statement("cosited_even = 0,");
  5523. statement("midpoint");
  5524. end_scope_decl();
  5525. statement("");
  5526. statement("enum class spvYCbCrModelConversion");
  5527. begin_scope();
  5528. statement("rgb_identity = 0,");
  5529. statement("ycbcr_identity,");
  5530. statement("ycbcr_bt_709,");
  5531. statement("ycbcr_bt_601,");
  5532. statement("ycbcr_bt_2020");
  5533. end_scope_decl();
  5534. statement("");
  5535. statement("enum class spvYCbCrRange");
  5536. begin_scope();
  5537. statement("itu_full = 0,");
  5538. statement("itu_narrow");
  5539. end_scope_decl();
  5540. statement("");
  5541. statement("struct spvComponentBits");
  5542. begin_scope();
  5543. statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}");
  5544. statement("uchar value : 6;");
  5545. end_scope_decl();
  5546. statement("// A class corresponding to metal::sampler which holds sampler");
  5547. statement("// Y'CbCr conversion info.");
  5548. statement("struct spvYCbCrSampler");
  5549. begin_scope();
  5550. statement("constexpr spvYCbCrSampler() thread : val(build()) {}");
  5551. statement("template<typename... Ts>");
  5552. statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}");
  5553. statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;");
  5554. statement("");
  5555. statement("spvFormatResolution get_resolution() const thread");
  5556. begin_scope();
  5557. statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);");
  5558. end_scope();
  5559. statement("spvChromaFilter get_chroma_filter() const thread");
  5560. begin_scope();
  5561. statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);");
  5562. end_scope();
  5563. statement("spvXChromaLocation get_x_chroma_offset() const thread");
  5564. begin_scope();
  5565. statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);");
  5566. end_scope();
  5567. statement("spvYChromaLocation get_y_chroma_offset() const thread");
  5568. begin_scope();
  5569. statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);");
  5570. end_scope();
  5571. statement("spvYCbCrModelConversion get_ycbcr_model() const thread");
  5572. begin_scope();
  5573. statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);");
  5574. end_scope();
  5575. statement("spvYCbCrRange get_ycbcr_range() const thread");
  5576. begin_scope();
  5577. statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);");
  5578. end_scope();
  5579. statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }");
  5580. statement("");
  5581. statement("private:");
  5582. statement("ushort val;");
  5583. statement("");
  5584. statement("constexpr static constant ushort resolution_bits = 2;");
  5585. statement("constexpr static constant ushort chroma_filter_bits = 2;");
  5586. statement("constexpr static constant ushort x_chroma_off_bit = 1;");
  5587. statement("constexpr static constant ushort y_chroma_off_bit = 1;");
  5588. statement("constexpr static constant ushort ycbcr_model_bits = 3;");
  5589. statement("constexpr static constant ushort ycbcr_range_bit = 1;");
  5590. statement("constexpr static constant ushort bpc_bits = 6;");
  5591. statement("");
  5592. statement("constexpr static constant ushort resolution_base = 0;");
  5593. statement("constexpr static constant ushort chroma_filter_base = 2;");
  5594. statement("constexpr static constant ushort x_chroma_off_base = 4;");
  5595. statement("constexpr static constant ushort y_chroma_off_base = 5;");
  5596. statement("constexpr static constant ushort ycbcr_model_base = 6;");
  5597. statement("constexpr static constant ushort ycbcr_range_base = 9;");
  5598. statement("constexpr static constant ushort bpc_base = 10;");
  5599. statement("");
  5600. statement(
  5601. "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;");
  5602. statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << "
  5603. "chroma_filter_base;");
  5604. statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << "
  5605. "x_chroma_off_base;");
  5606. statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << "
  5607. "y_chroma_off_base;");
  5608. statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << "
  5609. "ycbcr_model_base;");
  5610. statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << "
  5611. "ycbcr_range_base;");
  5612. statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;");
  5613. statement("");
  5614. statement("static constexpr ushort build()");
  5615. begin_scope();
  5616. statement("return 0;");
  5617. end_scope();
  5618. statement("");
  5619. statement("template<typename... Ts>");
  5620. statement("static constexpr ushort build(spvFormatResolution res, Ts... t)");
  5621. begin_scope();
  5622. statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);");
  5623. end_scope();
  5624. statement("");
  5625. statement("template<typename... Ts>");
  5626. statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)");
  5627. begin_scope();
  5628. statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);");
  5629. end_scope();
  5630. statement("");
  5631. statement("template<typename... Ts>");
  5632. statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)");
  5633. begin_scope();
  5634. statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);");
  5635. end_scope();
  5636. statement("");
  5637. statement("template<typename... Ts>");
  5638. statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)");
  5639. begin_scope();
  5640. statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);");
  5641. end_scope();
  5642. statement("");
  5643. statement("template<typename... Ts>");
  5644. statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)");
  5645. begin_scope();
  5646. statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);");
  5647. end_scope();
  5648. statement("");
  5649. statement("template<typename... Ts>");
  5650. statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)");
  5651. begin_scope();
  5652. statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);");
  5653. end_scope();
  5654. statement("");
  5655. statement("template<typename... Ts>");
  5656. statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)");
  5657. begin_scope();
  5658. statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);");
  5659. end_scope();
  5660. end_scope_decl();
  5661. statement("");
  5662. statement("// A class which can hold up to three textures and a sampler, including");
  5663. statement("// Y'CbCr conversion info, used to pass combined image-samplers");
  5664. statement("// dynamically to functions.");
  5665. statement("template<typename T>");
  5666. statement("struct spvDynamicImageSampler");
  5667. begin_scope();
  5668. statement("texture2d<T> plane0;");
  5669. statement("texture2d<T> plane1;");
  5670. statement("texture2d<T> plane2;");
  5671. statement("sampler samp;");
  5672. statement("spvYCbCrSampler ycbcr_samp;");
  5673. statement("uint swizzle = 0;");
  5674. statement("");
  5675. if (msl_options.swizzle_texture_samples)
  5676. {
  5677. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :");
  5678. statement(" plane0(tex), samp(samp), swizzle(sw) {}");
  5679. }
  5680. else
  5681. {
  5682. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :");
  5683. statement(" plane0(tex), samp(samp) {}");
  5684. }
  5685. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, "
  5686. "uint sw) thread :");
  5687. statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
  5688. statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,");
  5689. statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
  5690. statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
  5691. statement(
  5692. "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,");
  5693. statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
  5694. statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), "
  5695. "swizzle(sw) {}");
  5696. statement("");
  5697. // XXX This is really hard to follow... I've left comments to make it a bit easier.
  5698. statement("template<typename... LodOptions>");
  5699. statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread");
  5700. begin_scope();
  5701. statement("if (!is_null_texture(plane1))");
  5702. begin_scope();
  5703. statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||");
  5704. statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)");
  5705. begin_scope();
  5706. statement("if (!is_null_texture(plane2))");
  5707. statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,");
  5708. statement(" spvForward<LodOptions>(options)...);");
  5709. statement(
  5710. "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);");
  5711. end_scope(); // if (resolution == 422 || chroma_filter == nearest)
  5712. statement("switch (ycbcr_samp.get_resolution())");
  5713. begin_scope();
  5714. statement("case spvFormatResolution::_444: break;");
  5715. statement("case spvFormatResolution::_422:");
  5716. begin_scope();
  5717. statement("switch (ycbcr_samp.get_x_chroma_offset())");
  5718. begin_scope();
  5719. statement("case spvXChromaLocation::cosited_even:");
  5720. statement(" if (!is_null_texture(plane2))");
  5721. statement(" return spvChromaReconstructLinear422CositedEven(");
  5722. statement(" plane0, plane1, plane2, samp,");
  5723. statement(" coord, spvForward<LodOptions>(options)...);");
  5724. statement(" return spvChromaReconstructLinear422CositedEven(");
  5725. statement(" plane0, plane1, samp, coord,");
  5726. statement(" spvForward<LodOptions>(options)...);");
  5727. statement("case spvXChromaLocation::midpoint:");
  5728. statement(" if (!is_null_texture(plane2))");
  5729. statement(" return spvChromaReconstructLinear422Midpoint(");
  5730. statement(" plane0, plane1, plane2, samp,");
  5731. statement(" coord, spvForward<LodOptions>(options)...);");
  5732. statement(" return spvChromaReconstructLinear422Midpoint(");
  5733. statement(" plane0, plane1, samp, coord,");
  5734. statement(" spvForward<LodOptions>(options)...);");
  5735. end_scope(); // switch (x_chroma_offset)
  5736. end_scope(); // case 422:
  5737. statement("case spvFormatResolution::_420:");
  5738. begin_scope();
  5739. statement("switch (ycbcr_samp.get_x_chroma_offset())");
  5740. begin_scope();
  5741. statement("case spvXChromaLocation::cosited_even:");
  5742. begin_scope();
  5743. statement("switch (ycbcr_samp.get_y_chroma_offset())");
  5744. begin_scope();
  5745. statement("case spvYChromaLocation::cosited_even:");
  5746. statement(" if (!is_null_texture(plane2))");
  5747. statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
  5748. statement(" plane0, plane1, plane2, samp,");
  5749. statement(" coord, spvForward<LodOptions>(options)...);");
  5750. statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
  5751. statement(" plane0, plane1, samp, coord,");
  5752. statement(" spvForward<LodOptions>(options)...);");
  5753. statement("case spvYChromaLocation::midpoint:");
  5754. statement(" if (!is_null_texture(plane2))");
  5755. statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
  5756. statement(" plane0, plane1, plane2, samp,");
  5757. statement(" coord, spvForward<LodOptions>(options)...);");
  5758. statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
  5759. statement(" plane0, plane1, samp, coord,");
  5760. statement(" spvForward<LodOptions>(options)...);");
  5761. end_scope(); // switch (y_chroma_offset)
  5762. end_scope(); // case x::cosited_even:
  5763. statement("case spvXChromaLocation::midpoint:");
  5764. begin_scope();
  5765. statement("switch (ycbcr_samp.get_y_chroma_offset())");
  5766. begin_scope();
  5767. statement("case spvYChromaLocation::cosited_even:");
  5768. statement(" if (!is_null_texture(plane2))");
  5769. statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
  5770. statement(" plane0, plane1, plane2, samp,");
  5771. statement(" coord, spvForward<LodOptions>(options)...);");
  5772. statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
  5773. statement(" plane0, plane1, samp, coord,");
  5774. statement(" spvForward<LodOptions>(options)...);");
  5775. statement("case spvYChromaLocation::midpoint:");
  5776. statement(" if (!is_null_texture(plane2))");
  5777. statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
  5778. statement(" plane0, plane1, plane2, samp,");
  5779. statement(" coord, spvForward<LodOptions>(options)...);");
  5780. statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
  5781. statement(" plane0, plane1, samp, coord,");
  5782. statement(" spvForward<LodOptions>(options)...);");
  5783. end_scope(); // switch (y_chroma_offset)
  5784. end_scope(); // case x::midpoint
  5785. end_scope(); // switch (x_chroma_offset)
  5786. end_scope(); // case 420:
  5787. end_scope(); // switch (resolution)
  5788. end_scope(); // if (multiplanar)
  5789. statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);");
  5790. end_scope(); // do_sample()
  5791. statement("template <typename... LodOptions>");
  5792. statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread");
  5793. begin_scope();
  5794. statement(
  5795. "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);");
  5796. statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)");
  5797. statement(" return s;");
  5798. statement("");
  5799. statement("switch (ycbcr_samp.get_ycbcr_range())");
  5800. begin_scope();
  5801. statement("case spvYCbCrRange::itu_full:");
  5802. statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());");
  5803. statement(" break;");
  5804. statement("case spvYCbCrRange::itu_narrow:");
  5805. statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());");
  5806. statement(" break;");
  5807. end_scope();
  5808. statement("");
  5809. statement("switch (ycbcr_samp.get_ycbcr_model())");
  5810. begin_scope();
  5811. statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning
  5812. statement("case spvYCbCrModelConversion::ycbcr_identity:");
  5813. statement(" return s;");
  5814. statement("case spvYCbCrModelConversion::ycbcr_bt_709:");
  5815. statement(" return spvConvertYCbCrBT709(s);");
  5816. statement("case spvYCbCrModelConversion::ycbcr_bt_601:");
  5817. statement(" return spvConvertYCbCrBT601(s);");
  5818. statement("case spvYCbCrModelConversion::ycbcr_bt_2020:");
  5819. statement(" return spvConvertYCbCrBT2020(s);");
  5820. end_scope();
  5821. end_scope();
  5822. statement("");
  5823. // Sampler Y'CbCr conversion forbids offsets.
  5824. statement("vec<T, 4> sample(float2 coord, int2 offset) const thread");
  5825. begin_scope();
  5826. if (msl_options.swizzle_texture_samples)
  5827. statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);");
  5828. else
  5829. statement("return plane0.sample(samp, coord, offset);");
  5830. end_scope();
  5831. statement("template<typename lod_options>");
  5832. statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread");
  5833. begin_scope();
  5834. if (msl_options.swizzle_texture_samples)
  5835. statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);");
  5836. else
  5837. statement("return plane0.sample(samp, coord, options, offset);");
  5838. end_scope();
  5839. statement("#if __HAVE_MIN_LOD_CLAMP__");
  5840. statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread");
  5841. begin_scope();
  5842. statement("return plane0.sample(samp, coord, b, min_lod, offset);");
  5843. end_scope();
  5844. statement(
  5845. "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread");
  5846. begin_scope();
  5847. statement("return plane0.sample(samp, coord, grad, min_lod, offset);");
  5848. end_scope();
  5849. statement("#endif");
  5850. statement("");
  5851. // Y'CbCr conversion forbids all operations but sampling.
  5852. statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread");
  5853. begin_scope();
  5854. statement("return plane0.read(coord, lod);");
  5855. end_scope();
  5856. statement("");
  5857. statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread");
  5858. begin_scope();
  5859. if (msl_options.swizzle_texture_samples)
  5860. statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);");
  5861. else
  5862. statement("return plane0.gather(samp, coord, offset, c);");
  5863. end_scope();
  5864. end_scope_decl();
  5865. statement("");
  5866. default:
  5867. break;
  5868. }
  5869. }
  5870. }
  5871. static string inject_top_level_storage_qualifier(const string &expr, const string &qualifier)
  5872. {
  5873. // Easier to do this through text munging since the qualifier does not exist in the type system at all,
  5874. // and plumbing in all that information is not very helpful.
  5875. size_t last_reference = expr.find_last_of('&');
  5876. size_t last_pointer = expr.find_last_of('*');
  5877. size_t last_significant = string::npos;
  5878. if (last_reference == string::npos)
  5879. last_significant = last_pointer;
  5880. else if (last_pointer == string::npos)
  5881. last_significant = last_reference;
  5882. else
  5883. last_significant = std::max(last_reference, last_pointer);
  5884. if (last_significant == string::npos)
  5885. return join(qualifier, " ", expr);
  5886. else
  5887. {
  5888. return join(expr.substr(0, last_significant + 1), " ",
  5889. qualifier, expr.substr(last_significant + 1, string::npos));
  5890. }
  5891. }
  5892. // Undefined global memory is not allowed in MSL.
  5893. // Declare constant and init to zeros. Use {}, as global constructors can break Metal.
  5894. void CompilerMSL::declare_undefined_values()
  5895. {
  5896. bool emitted = false;
  5897. ir.for_each_typed_id<SPIRUndef>([&](uint32_t, SPIRUndef &undef) {
  5898. auto &type = this->get<SPIRType>(undef.basetype);
  5899. // OpUndef can be void for some reason ...
  5900. if (type.basetype == SPIRType::Void)
  5901. return;
  5902. statement(inject_top_level_storage_qualifier(
  5903. variable_decl(type, to_name(undef.self), undef.self),
  5904. "constant"),
  5905. " = {};");
  5906. emitted = true;
  5907. });
  5908. if (emitted)
  5909. statement("");
  5910. }
  5911. void CompilerMSL::declare_constant_arrays()
  5912. {
  5913. bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
  5914. // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
  5915. // global constants directly, so we are able to use constants as variable expressions.
  5916. bool emitted = false;
  5917. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  5918. if (c.specialization)
  5919. return;
  5920. auto &type = this->get<SPIRType>(c.constant_type);
  5921. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries.
  5922. // FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there.
  5923. // If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to
  5924. // link into Metal libraries. This is hacky.
  5925. if (!type.array.empty() && (!fully_inlined || is_scalar(type) || is_vector(type)))
  5926. {
  5927. auto name = to_name(c.self);
  5928. statement(inject_top_level_storage_qualifier(variable_decl(type, name), "constant"),
  5929. " = ", constant_expression(c), ";");
  5930. emitted = true;
  5931. }
  5932. });
  5933. if (emitted)
  5934. statement("");
  5935. }
  5936. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
  5937. void CompilerMSL::declare_complex_constant_arrays()
  5938. {
  5939. // If we do not have a fully inlined module, we did not opt in to
  5940. // declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays().
  5941. bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
  5942. if (!fully_inlined)
  5943. return;
  5944. // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
  5945. // global constants directly, so we are able to use constants as variable expressions.
  5946. bool emitted = false;
  5947. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  5948. if (c.specialization)
  5949. return;
  5950. auto &type = this->get<SPIRType>(c.constant_type);
  5951. if (!type.array.empty() && !(is_scalar(type) || is_vector(type)))
  5952. {
  5953. auto name = to_name(c.self);
  5954. statement("", variable_decl(type, name), " = ", constant_expression(c), ";");
  5955. emitted = true;
  5956. }
  5957. });
  5958. if (emitted)
  5959. statement("");
  5960. }
  5961. void CompilerMSL::emit_resources()
  5962. {
  5963. declare_constant_arrays();
  5964. declare_undefined_values();
  5965. // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created.
  5966. emit_interface_block(stage_out_var_id);
  5967. emit_interface_block(patch_stage_out_var_id);
  5968. emit_interface_block(stage_in_var_id);
  5969. emit_interface_block(patch_stage_in_var_id);
  5970. }
  5971. // Emit declarations for the specialization Metal function constants
  5972. void CompilerMSL::emit_specialization_constants_and_structs()
  5973. {
  5974. SpecializationConstant wg_x, wg_y, wg_z;
  5975. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  5976. bool emitted = false;
  5977. unordered_set<uint32_t> declared_structs;
  5978. unordered_set<uint32_t> aligned_structs;
  5979. // First, we need to deal with scalar block layout.
  5980. // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself.
  5981. // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types.
  5982. // This makes the struct alignment as small as physically possible.
  5983. // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types.
  5984. ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) {
  5985. if (type.basetype == SPIRType::Struct &&
  5986. has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
  5987. mark_scalar_layout_structs(type);
  5988. });
  5989. bool builtin_block_type_is_required = false;
  5990. // Very special case. If gl_PerVertex is initialized as an array (tessellation)
  5991. // we have to potentially emit the gl_PerVertex struct type so that we can emit a constant LUT.
  5992. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  5993. auto &type = this->get<SPIRType>(c.constant_type);
  5994. if (is_array(type) && has_decoration(type.self, DecorationBlock) && is_builtin_type(type))
  5995. builtin_block_type_is_required = true;
  5996. });
  5997. // Very particular use of the soft loop lock.
  5998. // align_struct may need to create custom types on the fly, but we don't care about
  5999. // these types for purpose of iterating over them in ir.ids_for_type and friends.
  6000. auto loop_lock = ir.create_loop_soft_lock();
  6001. for (auto &id_ : ir.ids_for_constant_or_type)
  6002. {
  6003. auto &id = ir.ids[id_];
  6004. if (id.get_type() == TypeConstant)
  6005. {
  6006. auto &c = id.get<SPIRConstant>();
  6007. if (c.self == workgroup_size_id)
  6008. {
  6009. // TODO: This can be expressed as a [[threads_per_threadgroup]] input semantic, but we need to know
  6010. // the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global.
  6011. // The work group size may be a specialization constant.
  6012. statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup),
  6013. " [[maybe_unused]] = ", constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
  6014. emitted = true;
  6015. }
  6016. else if (c.specialization)
  6017. {
  6018. auto &type = get<SPIRType>(c.constant_type);
  6019. string sc_type_name = type_to_glsl(type);
  6020. string sc_name = to_name(c.self);
  6021. string sc_tmp_name = sc_name + "_tmp";
  6022. // Function constants are only supported in MSL 1.2 and later.
  6023. // If we don't support it just declare the "default" directly.
  6024. // This "default" value can be overridden to the true specialization constant by the API user.
  6025. // Specialization constants which are used as array length expressions cannot be function constants in MSL,
  6026. // so just fall back to macros.
  6027. if (msl_options.supports_msl_version(1, 2) && has_decoration(c.self, DecorationSpecId) &&
  6028. !c.is_used_as_array_length)
  6029. {
  6030. uint32_t constant_id = get_decoration(c.self, DecorationSpecId);
  6031. // Only scalar, non-composite values can be function constants.
  6032. statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
  6033. ")]];");
  6034. statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
  6035. ") ? ", sc_tmp_name, " : ", constant_expression(c), ";");
  6036. }
  6037. else if (has_decoration(c.self, DecorationSpecId))
  6038. {
  6039. // Fallback to macro overrides.
  6040. c.specialization_constant_macro_name =
  6041. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  6042. statement("#ifndef ", c.specialization_constant_macro_name);
  6043. statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
  6044. statement("#endif");
  6045. statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name,
  6046. ";");
  6047. }
  6048. else
  6049. {
  6050. // Composite specialization constants must be built from other specialization constants.
  6051. statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";");
  6052. }
  6053. emitted = true;
  6054. }
  6055. }
  6056. else if (id.get_type() == TypeConstantOp)
  6057. {
  6058. auto &c = id.get<SPIRConstantOp>();
  6059. auto &type = get<SPIRType>(c.basetype);
  6060. auto name = to_name(c.self);
  6061. statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
  6062. emitted = true;
  6063. }
  6064. else if (id.get_type() == TypeType)
  6065. {
  6066. // Output non-builtin interface structs. These include local function structs
  6067. // and structs nested within uniform and read-write buffers.
  6068. auto &type = id.get<SPIRType>();
  6069. TypeID type_id = type.self;
  6070. bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer;
  6071. bool is_block =
  6072. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  6073. bool is_builtin_block = is_block && is_builtin_type(type);
  6074. bool is_declarable_struct = is_struct && (!is_builtin_block || builtin_block_type_is_required);
  6075. // We'll declare this later.
  6076. if (stage_out_var_id && get_stage_out_struct_type().self == type_id)
  6077. is_declarable_struct = false;
  6078. if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id)
  6079. is_declarable_struct = false;
  6080. if (stage_in_var_id && get_stage_in_struct_type().self == type_id)
  6081. is_declarable_struct = false;
  6082. if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id)
  6083. is_declarable_struct = false;
  6084. // Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it.
  6085. if (stage_out_masked_builtin_type_id == type_id)
  6086. is_declarable_struct = true;
  6087. // Align and emit declarable structs...but avoid declaring each more than once.
  6088. if (is_declarable_struct && declared_structs.count(type_id) == 0)
  6089. {
  6090. if (emitted)
  6091. statement("");
  6092. emitted = false;
  6093. declared_structs.insert(type_id);
  6094. if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
  6095. align_struct(type, aligned_structs);
  6096. // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
  6097. emit_struct(get<SPIRType>(type_id));
  6098. }
  6099. }
  6100. }
  6101. if (emitted)
  6102. statement("");
  6103. }
  6104. void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6105. const char *op)
  6106. {
  6107. bool forward = should_forward(op0) && should_forward(op1);
  6108. emit_op(result_type, result_id,
  6109. join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1),
  6110. ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1),
  6111. ")"),
  6112. forward);
  6113. inherit_expression_dependencies(result_id, op0);
  6114. inherit_expression_dependencies(result_id, op1);
  6115. }
  6116. bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr)
  6117. {
  6118. auto &ptr_type = expression_type(ptr);
  6119. auto &result_type = get<SPIRType>(result_type_id);
  6120. if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput)
  6121. return false;
  6122. if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation)
  6123. return false;
  6124. if (has_decoration(ptr, DecorationPatch))
  6125. return false;
  6126. bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable;
  6127. bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage);
  6128. bool flat_data_type = flattened_io &&
  6129. (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct);
  6130. // Edge case, even with multi-patch workgroups, we still need to unroll load
  6131. // if we're loading control points directly.
  6132. if (ptr_is_io_variable && is_array(result_type))
  6133. flat_data_type = true;
  6134. if (!flat_data_type)
  6135. return false;
  6136. // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out.
  6137. // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup
  6138. // unless we're forced to do this when the code is emitting inoptimal OpLoads.
  6139. string expr;
  6140. uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex);
  6141. auto *var = maybe_get_backing_variable(ptr);
  6142. auto &expr_type = get_pointee_type(ptr_type.self);
  6143. const auto &iface_type = expression_type(stage_in_ptr_var_id);
  6144. if (!flattened_io)
  6145. {
  6146. // Simplest case for multi-patch workgroups, just unroll array as-is.
  6147. if (interface_index == uint32_t(-1))
  6148. return false;
  6149. expr += type_to_glsl(result_type) + "({ ";
  6150. uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1);
  6151. for (uint32_t i = 0; i < num_control_points; i++)
  6152. {
  6153. const uint32_t indices[2] = { i, interface_index };
  6154. AccessChainMeta meta;
  6155. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  6156. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6157. if (i + 1 < num_control_points)
  6158. expr += ", ";
  6159. }
  6160. expr += " })";
  6161. }
  6162. else if (result_type.array.size() > 2)
  6163. {
  6164. SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions.");
  6165. }
  6166. else if (result_type.array.size() == 2)
  6167. {
  6168. if (!ptr_is_io_variable)
  6169. SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable.");
  6170. if (interface_index == uint32_t(-1))
  6171. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  6172. if (result_type.basetype == SPIRType::Struct || is_matrix(result_type))
  6173. SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO.");
  6174. expr += type_to_glsl(result_type) + "({ ";
  6175. uint32_t num_control_points = to_array_size_literal(result_type, 1);
  6176. uint32_t base_interface_index = interface_index;
  6177. auto &sub_type = get<SPIRType>(result_type.parent_type);
  6178. for (uint32_t i = 0; i < num_control_points; i++)
  6179. {
  6180. expr += type_to_glsl(sub_type) + "({ ";
  6181. interface_index = base_interface_index;
  6182. uint32_t array_size = to_array_size_literal(result_type, 0);
  6183. for (uint32_t j = 0; j < array_size; j++, interface_index++)
  6184. {
  6185. const uint32_t indices[2] = { i, interface_index };
  6186. AccessChainMeta meta;
  6187. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  6188. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6189. if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
  6190. expr_type.vecsize > sub_type.vecsize)
  6191. expr += vector_swizzle(sub_type.vecsize, 0);
  6192. if (j + 1 < array_size)
  6193. expr += ", ";
  6194. }
  6195. expr += " })";
  6196. if (i + 1 < num_control_points)
  6197. expr += ", ";
  6198. }
  6199. expr += " })";
  6200. }
  6201. else if (result_type.basetype == SPIRType::Struct)
  6202. {
  6203. bool is_array_of_struct = is_array(result_type);
  6204. if (is_array_of_struct && !ptr_is_io_variable)
  6205. SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable.");
  6206. uint32_t num_control_points = 1;
  6207. if (is_array_of_struct)
  6208. {
  6209. num_control_points = to_array_size_literal(result_type, 0);
  6210. expr += type_to_glsl(result_type) + "({ ";
  6211. }
  6212. auto &struct_type = is_array_of_struct ? get<SPIRType>(result_type.parent_type) : result_type;
  6213. assert(struct_type.array.empty());
  6214. for (uint32_t i = 0; i < num_control_points; i++)
  6215. {
  6216. expr += type_to_glsl(struct_type) + "{ ";
  6217. for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++)
  6218. {
  6219. // The base interface index is stored per variable for structs.
  6220. if (var)
  6221. {
  6222. interface_index =
  6223. get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex);
  6224. }
  6225. if (interface_index == uint32_t(-1))
  6226. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  6227. const auto &mbr_type = get<SPIRType>(struct_type.member_types[j]);
  6228. const auto &expr_mbr_type = get<SPIRType>(expr_type.member_types[j]);
  6229. if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput)
  6230. {
  6231. expr += type_to_glsl(mbr_type) + "(";
  6232. for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++)
  6233. {
  6234. if (is_array_of_struct)
  6235. {
  6236. const uint32_t indices[2] = { i, interface_index };
  6237. AccessChainMeta meta;
  6238. expr += access_chain_internal(
  6239. stage_in_ptr_var_id, indices, 2,
  6240. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6241. }
  6242. else
  6243. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  6244. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  6245. expr += vector_swizzle(mbr_type.vecsize, 0);
  6246. if (k + 1 < mbr_type.columns)
  6247. expr += ", ";
  6248. }
  6249. expr += ")";
  6250. }
  6251. else if (is_array(mbr_type))
  6252. {
  6253. expr += type_to_glsl(mbr_type) + "({ ";
  6254. uint32_t array_size = to_array_size_literal(mbr_type, 0);
  6255. for (uint32_t k = 0; k < array_size; k++, interface_index++)
  6256. {
  6257. if (is_array_of_struct)
  6258. {
  6259. const uint32_t indices[2] = { i, interface_index };
  6260. AccessChainMeta meta;
  6261. expr += access_chain_internal(
  6262. stage_in_ptr_var_id, indices, 2,
  6263. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6264. }
  6265. else
  6266. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  6267. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  6268. expr += vector_swizzle(mbr_type.vecsize, 0);
  6269. if (k + 1 < array_size)
  6270. expr += ", ";
  6271. }
  6272. expr += " })";
  6273. }
  6274. else
  6275. {
  6276. if (is_array_of_struct)
  6277. {
  6278. const uint32_t indices[2] = { i, interface_index };
  6279. AccessChainMeta meta;
  6280. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  6281. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT,
  6282. &meta);
  6283. }
  6284. else
  6285. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  6286. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  6287. expr += vector_swizzle(mbr_type.vecsize, 0);
  6288. }
  6289. if (j + 1 < struct_type.member_types.size())
  6290. expr += ", ";
  6291. }
  6292. expr += " }";
  6293. if (i + 1 < num_control_points)
  6294. expr += ", ";
  6295. }
  6296. if (is_array_of_struct)
  6297. expr += " })";
  6298. }
  6299. else if (is_matrix(result_type))
  6300. {
  6301. bool is_array_of_matrix = is_array(result_type);
  6302. if (is_array_of_matrix && !ptr_is_io_variable)
  6303. SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable.");
  6304. if (interface_index == uint32_t(-1))
  6305. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  6306. if (is_array_of_matrix)
  6307. {
  6308. // Loading a matrix from each control point.
  6309. uint32_t base_interface_index = interface_index;
  6310. uint32_t num_control_points = to_array_size_literal(result_type, 0);
  6311. expr += type_to_glsl(result_type) + "({ ";
  6312. auto &matrix_type = get_variable_element_type(get<SPIRVariable>(ptr));
  6313. for (uint32_t i = 0; i < num_control_points; i++)
  6314. {
  6315. interface_index = base_interface_index;
  6316. expr += type_to_glsl(matrix_type) + "(";
  6317. for (uint32_t j = 0; j < result_type.columns; j++, interface_index++)
  6318. {
  6319. const uint32_t indices[2] = { i, interface_index };
  6320. AccessChainMeta meta;
  6321. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  6322. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6323. if (expr_type.vecsize > result_type.vecsize)
  6324. expr += vector_swizzle(result_type.vecsize, 0);
  6325. if (j + 1 < result_type.columns)
  6326. expr += ", ";
  6327. }
  6328. expr += ")";
  6329. if (i + 1 < num_control_points)
  6330. expr += ", ";
  6331. }
  6332. expr += " })";
  6333. }
  6334. else
  6335. {
  6336. expr += type_to_glsl(result_type) + "(";
  6337. for (uint32_t i = 0; i < result_type.columns; i++, interface_index++)
  6338. {
  6339. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  6340. if (expr_type.vecsize > result_type.vecsize)
  6341. expr += vector_swizzle(result_type.vecsize, 0);
  6342. if (i + 1 < result_type.columns)
  6343. expr += ", ";
  6344. }
  6345. expr += ")";
  6346. }
  6347. }
  6348. else if (ptr_is_io_variable)
  6349. {
  6350. assert(is_array(result_type));
  6351. assert(result_type.array.size() == 1);
  6352. if (interface_index == uint32_t(-1))
  6353. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  6354. // We're loading an array directly from a global variable.
  6355. // This means we're loading one member from each control point.
  6356. expr += type_to_glsl(result_type) + "({ ";
  6357. uint32_t num_control_points = to_array_size_literal(result_type, 0);
  6358. for (uint32_t i = 0; i < num_control_points; i++)
  6359. {
  6360. const uint32_t indices[2] = { i, interface_index };
  6361. AccessChainMeta meta;
  6362. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  6363. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  6364. if (expr_type.vecsize > result_type.vecsize)
  6365. expr += vector_swizzle(result_type.vecsize, 0);
  6366. if (i + 1 < num_control_points)
  6367. expr += ", ";
  6368. }
  6369. expr += " })";
  6370. }
  6371. else
  6372. {
  6373. // We're loading an array from a concrete control point.
  6374. assert(is_array(result_type));
  6375. assert(result_type.array.size() == 1);
  6376. if (interface_index == uint32_t(-1))
  6377. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  6378. expr += type_to_glsl(result_type) + "({ ";
  6379. uint32_t array_size = to_array_size_literal(result_type, 0);
  6380. for (uint32_t i = 0; i < array_size; i++, interface_index++)
  6381. {
  6382. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  6383. if (expr_type.vecsize > result_type.vecsize)
  6384. expr += vector_swizzle(result_type.vecsize, 0);
  6385. if (i + 1 < array_size)
  6386. expr += ", ";
  6387. }
  6388. expr += " })";
  6389. }
  6390. emit_op(result_type_id, id, expr, false);
  6391. register_read(id, ptr, false);
  6392. return true;
  6393. }
  6394. bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length)
  6395. {
  6396. // If this is a per-vertex output, remap it to the I/O array buffer.
  6397. // Any object which did not go through IO flattening shenanigans will go there instead.
  6398. // We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays.
  6399. auto *var = maybe_get_backing_variable(ops[2]);
  6400. bool patch = false;
  6401. bool flat_data = false;
  6402. bool ptr_is_chain = false;
  6403. bool flatten_composites = false;
  6404. bool is_block = false;
  6405. if (var)
  6406. is_block = has_decoration(get_variable_data_type(*var).self, DecorationBlock);
  6407. if (var)
  6408. {
  6409. flatten_composites = variable_storage_requires_stage_io(var->storage);
  6410. patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var));
  6411. // Should match strip_array in add_interface_block.
  6412. flat_data = var->storage == StorageClassInput ||
  6413. (var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl);
  6414. // Patch inputs are treated as normal block IO variables, so they don't deal with this path at all.
  6415. if (patch && (!is_block || var->storage == StorageClassInput))
  6416. flat_data = false;
  6417. // We might have a chained access chain, where
  6418. // we first take the access chain to the control point, and then we chain into a member or something similar.
  6419. // In this case, we need to skip gl_in/gl_out remapping.
  6420. // Also, skip ptr chain for patches.
  6421. ptr_is_chain = var->self != ID(ops[2]);
  6422. }
  6423. bool builtin_variable = false;
  6424. bool variable_is_flat = false;
  6425. if (var && flat_data)
  6426. {
  6427. builtin_variable = is_builtin_variable(*var);
  6428. BuiltIn bi_type = BuiltInMax;
  6429. if (builtin_variable && !is_block)
  6430. bi_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  6431. variable_is_flat = !builtin_variable || is_block ||
  6432. bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  6433. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
  6434. }
  6435. if (variable_is_flat)
  6436. {
  6437. // If output is masked, it is emitted as a "normal" variable, just go through normal code paths.
  6438. // Only check this for the first level of access chain.
  6439. // Dealing with this for partial access chains should be possible, but awkward.
  6440. if (var->storage == StorageClassOutput && !ptr_is_chain)
  6441. {
  6442. bool masked = false;
  6443. if (is_block)
  6444. {
  6445. uint32_t relevant_member_index = patch ? 3 : 4;
  6446. // FIXME: This won't work properly if the application first access chains into gl_out element,
  6447. // then access chains into the member. Super weird, but theoretically possible ...
  6448. if (length > relevant_member_index)
  6449. {
  6450. uint32_t mbr_idx = get<SPIRConstant>(ops[relevant_member_index]).scalar();
  6451. masked = is_stage_output_block_member_masked(*var, mbr_idx, true);
  6452. }
  6453. }
  6454. else if (var)
  6455. masked = is_stage_output_variable_masked(*var);
  6456. if (masked)
  6457. return false;
  6458. }
  6459. AccessChainMeta meta;
  6460. SmallVector<uint32_t> indices;
  6461. uint32_t next_id = ir.increase_bound_by(1);
  6462. indices.reserve(length - 3 + 1);
  6463. uint32_t first_non_array_index = (ptr_is_chain ? 3 : 4) - (patch ? 1 : 0);
  6464. VariableID stage_var_id;
  6465. if (patch)
  6466. stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id;
  6467. else
  6468. stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
  6469. VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id;
  6470. if (!ptr_is_chain && !patch)
  6471. {
  6472. // Index into gl_in/gl_out with first array index.
  6473. indices.push_back(ops[first_non_array_index - 1]);
  6474. }
  6475. auto &result_ptr_type = get<SPIRType>(ops[0]);
  6476. uint32_t const_mbr_id = next_id++;
  6477. uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
  6478. // If we have a pointer chain expression, and we are no longer pointing to a composite
  6479. // object, we are in the clear. There is no longer a need to flatten anything.
  6480. bool further_access_chain_is_trivial = false;
  6481. if (ptr_is_chain && flatten_composites)
  6482. {
  6483. auto &ptr_type = expression_type(ptr);
  6484. if (!is_array(ptr_type) && !is_matrix(ptr_type) && ptr_type.basetype != SPIRType::Struct)
  6485. further_access_chain_is_trivial = true;
  6486. }
  6487. if (!further_access_chain_is_trivial && (flatten_composites || is_block))
  6488. {
  6489. uint32_t i = first_non_array_index;
  6490. auto *type = &get_variable_element_type(*var);
  6491. if (index == uint32_t(-1) && length >= (first_non_array_index + 1))
  6492. {
  6493. // Maybe this is a struct type in the input class, in which case
  6494. // we put it as a decoration on the corresponding member.
  6495. uint32_t mbr_idx = get_constant(ops[first_non_array_index]).scalar();
  6496. index = get_extended_member_decoration(var->self, mbr_idx,
  6497. SPIRVCrossDecorationInterfaceMemberIndex);
  6498. assert(index != uint32_t(-1));
  6499. i++;
  6500. type = &get<SPIRType>(type->member_types[mbr_idx]);
  6501. }
  6502. // In this case, we're poking into flattened structures and arrays, so now we have to
  6503. // combine the following indices. If we encounter a non-constant index,
  6504. // we're hosed.
  6505. for (; flatten_composites && i < length; ++i)
  6506. {
  6507. if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct)
  6508. break;
  6509. auto *c = maybe_get<SPIRConstant>(ops[i]);
  6510. if (!c || c->specialization)
  6511. SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. "
  6512. "This is currently unsupported.");
  6513. // We're in flattened space, so just increment the member index into IO block.
  6514. // We can only do this once in the current implementation, so either:
  6515. // Struct, Matrix or 1-dimensional array for a control point.
  6516. if (type->basetype == SPIRType::Struct && var->storage == StorageClassOutput)
  6517. {
  6518. // Need to consider holes, since individual block members might be masked away.
  6519. uint32_t mbr_idx = c->scalar();
  6520. for (uint32_t j = 0; j < mbr_idx; j++)
  6521. if (!is_stage_output_block_member_masked(*var, j, true))
  6522. index++;
  6523. }
  6524. else
  6525. index += c->scalar();
  6526. if (type->parent_type)
  6527. type = &get<SPIRType>(type->parent_type);
  6528. else if (type->basetype == SPIRType::Struct)
  6529. type = &get<SPIRType>(type->member_types[c->scalar()]);
  6530. }
  6531. // We're not going to emit the actual member name, we let any further OpLoad take care of that.
  6532. // Tag the access chain with the member index we're referencing.
  6533. bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) ||
  6534. result_ptr_type.basetype == SPIRType::Struct);
  6535. if (!defer_access_chain)
  6536. {
  6537. // Access the appropriate member of gl_in/gl_out.
  6538. set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
  6539. indices.push_back(const_mbr_id);
  6540. // Member index is now irrelevant.
  6541. index = uint32_t(-1);
  6542. // Append any straggling access chain indices.
  6543. if (i < length)
  6544. indices.insert(indices.end(), ops + i, ops + length);
  6545. }
  6546. else
  6547. {
  6548. // We must have consumed the entire access chain if we're deferring it.
  6549. assert(i == length);
  6550. }
  6551. if (index != uint32_t(-1))
  6552. set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index);
  6553. else
  6554. unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
  6555. }
  6556. else
  6557. {
  6558. if (index != uint32_t(-1))
  6559. {
  6560. set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
  6561. indices.push_back(const_mbr_id);
  6562. }
  6563. // Member index is now irrelevant.
  6564. index = uint32_t(-1);
  6565. unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
  6566. indices.insert(indices.end(), ops + first_non_array_index, ops + length);
  6567. }
  6568. // We use the pointer to the base of the input/output array here,
  6569. // so this is always a pointer chain.
  6570. string e;
  6571. if (!ptr_is_chain)
  6572. {
  6573. // This is the start of an access chain, use ptr_chain to index into control point array.
  6574. e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, !patch);
  6575. }
  6576. else
  6577. {
  6578. // If we're accessing a struct, we need to use member indices which are based on the IO block,
  6579. // not actual struct type, so we have to use a split access chain here where
  6580. // first path resolves the control point index, i.e. gl_in[index], and second half deals with
  6581. // looking up flattened member name.
  6582. // However, it is possible that we partially accessed a struct,
  6583. // by taking pointer to member inside the control-point array.
  6584. // For this case, we fall back to a natural access chain since we have already dealt with remapping struct members.
  6585. // One way to check this here is if we have 2 implied read expressions.
  6586. // First one is the gl_in/gl_out struct itself, then an index into that array.
  6587. // If we have traversed further, we use a normal access chain formulation.
  6588. auto *ptr_expr = maybe_get<SPIRExpression>(ptr);
  6589. bool split_access_chain_formulation = flatten_composites && ptr_expr &&
  6590. ptr_expr->implied_read_expressions.size() == 2 &&
  6591. !further_access_chain_is_trivial;
  6592. if (split_access_chain_formulation)
  6593. {
  6594. e = join(to_expression(ptr),
  6595. access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()),
  6596. ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta));
  6597. }
  6598. else
  6599. {
  6600. e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta);
  6601. }
  6602. }
  6603. // Get the actual type of the object that was accessed. If it's a vector type and we changed it,
  6604. // then we'll need to add a swizzle.
  6605. // For this, we can't necessarily rely on the type of the base expression, because it might be
  6606. // another access chain, and it will therefore already have the "correct" type.
  6607. auto *expr_type = &get_variable_data_type(*var);
  6608. if (has_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID))
  6609. expr_type = &get<SPIRType>(get_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID));
  6610. for (uint32_t i = 3; i < length; i++)
  6611. {
  6612. if (!is_array(*expr_type) && expr_type->basetype == SPIRType::Struct)
  6613. expr_type = &get<SPIRType>(expr_type->member_types[get<SPIRConstant>(ops[i]).scalar()]);
  6614. else
  6615. expr_type = &get<SPIRType>(expr_type->parent_type);
  6616. }
  6617. if (!is_array(*expr_type) && !is_matrix(*expr_type) && expr_type->basetype != SPIRType::Struct &&
  6618. expr_type->vecsize > result_ptr_type.vecsize)
  6619. e += vector_swizzle(result_ptr_type.vecsize, 0);
  6620. auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
  6621. expr.loaded_from = var->self;
  6622. expr.need_transpose = meta.need_transpose;
  6623. expr.access_chain = true;
  6624. // Mark the result as being packed if necessary.
  6625. if (meta.storage_is_packed)
  6626. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  6627. if (meta.storage_physical_type != 0)
  6628. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  6629. if (meta.storage_is_invariant)
  6630. set_decoration(ops[1], DecorationInvariant);
  6631. // Save the type we found in case the result is used in another access chain.
  6632. set_extended_decoration(ops[1], SPIRVCrossDecorationTessIOOriginalInputTypeID, expr_type->self);
  6633. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  6634. // temporary which could be subject to invalidation.
  6635. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  6636. forwarded_temporaries.insert(ops[1]);
  6637. // The access chain itself is never forced to a temporary, but its dependencies might.
  6638. suppressed_usage_tracking.insert(ops[1]);
  6639. for (uint32_t i = 2; i < length; i++)
  6640. {
  6641. inherit_expression_dependencies(ops[1], ops[i]);
  6642. add_implied_read_expression(expr, ops[i]);
  6643. }
  6644. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  6645. // we're not forwarded after all.
  6646. if (expr.expression_dependencies.empty())
  6647. forwarded_temporaries.erase(ops[1]);
  6648. return true;
  6649. }
  6650. // If this is the inner tessellation level, and we're tessellating triangles,
  6651. // drop the last index. It isn't an array in this case, so we can't have an
  6652. // array reference here. We need to make this ID a variable instead of an
  6653. // expression so we don't try to dereference it as a variable pointer.
  6654. // Don't do this if the index is a constant 1, though. We need to drop stores
  6655. // to that one.
  6656. auto *m = ir.find_meta(var ? var->self : ID(0));
  6657. if (get_execution_model() == ExecutionModelTessellationControl && var && m &&
  6658. m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles))
  6659. {
  6660. auto *c = maybe_get<SPIRConstant>(ops[3]);
  6661. if (c && c->scalar() == 1)
  6662. return false;
  6663. auto &dest_var = set<SPIRVariable>(ops[1], *var);
  6664. dest_var.basetype = ops[0];
  6665. ir.meta[ops[1]] = ir.meta[ops[2]];
  6666. inherit_expression_dependencies(ops[1], ops[2]);
  6667. return true;
  6668. }
  6669. return false;
  6670. }
  6671. bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
  6672. {
  6673. if (!get_entry_point().flags.get(ExecutionModeTriangles))
  6674. return false;
  6675. // In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has
  6676. // four. This is true even if we are tessellating triangles. This allows clients
  6677. // to use a single tessellation control shader with multiple tessellation evaluation
  6678. // shaders.
  6679. // In Metal, however, only the first element of TessLevelInner and the first three
  6680. // of TessLevelOuter are accessible. This stems from how in Metal, the tessellation
  6681. // levels must be stored to a dedicated buffer in a particular format that depends
  6682. // on the patch type. Therefore, in Triangles mode, any access to the second
  6683. // inner level or the fourth outer level must be dropped.
  6684. const auto *e = maybe_get<SPIRExpression>(id_lhs);
  6685. if (!e || !e->access_chain)
  6686. return false;
  6687. BuiltIn builtin = BuiltIn(get_decoration(e->loaded_from, DecorationBuiltIn));
  6688. if (builtin != BuiltInTessLevelInner && builtin != BuiltInTessLevelOuter)
  6689. return false;
  6690. auto *c = maybe_get<SPIRConstant>(e->implied_read_expressions[1]);
  6691. if (!c)
  6692. return false;
  6693. return (builtin == BuiltInTessLevelInner && c->scalar() == 1) ||
  6694. (builtin == BuiltInTessLevelOuter && c->scalar() == 3);
  6695. }
  6696. void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
  6697. spv::StorageClass storage, bool &is_packed)
  6698. {
  6699. // If there is any risk of writes happening with the access chain in question,
  6700. // and there is a risk of concurrent write access to other components,
  6701. // we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
  6702. // The MSL compiler refuses to allow component-level access for any non-packed vector types.
  6703. if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
  6704. {
  6705. const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
  6706. expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
  6707. // Further indexing should happen with packed rules (array index, not swizzle).
  6708. is_packed = true;
  6709. }
  6710. }
  6711. bool CompilerMSL::access_chain_needs_stage_io_builtin_translation(uint32_t base)
  6712. {
  6713. auto *var = maybe_get_backing_variable(base);
  6714. if (!var || !is_tessellation_shader())
  6715. return true;
  6716. // We only need to rewrite builtin access chains when accessing flattened builtins like gl_ClipDistance_N.
  6717. // Avoid overriding it back to just gl_ClipDistance.
  6718. // This can only happen in scenarios where we cannot flatten/unflatten access chains, so, the only case
  6719. // where this triggers is evaluation shader inputs.
  6720. bool redirect_builtin = get_execution_model() == ExecutionModelTessellationEvaluation ?
  6721. var->storage == StorageClassOutput : false;
  6722. return redirect_builtin;
  6723. }
  6724. // Sets the interface member index for an access chain to a pull-model interpolant.
  6725. void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length)
  6726. {
  6727. auto *var = maybe_get_backing_variable(ops[2]);
  6728. if (!var || !pull_model_inputs.count(var->self))
  6729. return;
  6730. // Get the base index.
  6731. uint32_t interface_index;
  6732. auto &var_type = get_variable_data_type(*var);
  6733. auto &result_type = get<SPIRType>(ops[0]);
  6734. auto *type = &var_type;
  6735. if (has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex))
  6736. {
  6737. interface_index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
  6738. }
  6739. else
  6740. {
  6741. // Assume an access chain into a struct variable.
  6742. assert(var_type.basetype == SPIRType::Struct);
  6743. auto &c = get<SPIRConstant>(ops[3 + var_type.array.size()]);
  6744. interface_index =
  6745. get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex);
  6746. }
  6747. // Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted
  6748. // for that getting the base index.
  6749. for (uint32_t i = 3; i < length; ++i)
  6750. {
  6751. if (is_vector(*type) && !is_array(*type) && is_scalar(result_type))
  6752. {
  6753. // We don't want to combine the next index. Actually, we need to save it
  6754. // so we know to apply a swizzle to the result of the interpolation.
  6755. set_extended_decoration(ops[1], SPIRVCrossDecorationInterpolantComponentExpr, ops[i]);
  6756. break;
  6757. }
  6758. auto *c = maybe_get<SPIRConstant>(ops[i]);
  6759. if (!c || c->specialization)
  6760. SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable using pull-model "
  6761. "interpolation. This is currently unsupported.");
  6762. if (type->parent_type)
  6763. type = &get<SPIRType>(type->parent_type);
  6764. else if (type->basetype == SPIRType::Struct)
  6765. type = &get<SPIRType>(type->member_types[c->scalar()]);
  6766. if (!has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex) &&
  6767. i - 3 == var_type.array.size())
  6768. continue;
  6769. interface_index += c->scalar();
  6770. }
  6771. // Save this to the access chain itself so we can recover it later when calling an interpolation function.
  6772. set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index);
  6773. }
  6774. // Override for MSL-specific syntax instructions
  6775. void CompilerMSL::emit_instruction(const Instruction &instruction)
  6776. {
  6777. #define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  6778. #define MSL_BOP_CAST(op, type) \
  6779. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  6780. #define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  6781. #define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  6782. #define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  6783. #define MSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  6784. #define MSL_BFOP_CAST(op, type) \
  6785. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  6786. #define MSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  6787. #define MSL_UNORD_BOP(op) emit_binary_unord_op(ops[0], ops[1], ops[2], ops[3], #op)
  6788. auto ops = stream(instruction);
  6789. auto opcode = static_cast<Op>(instruction.op);
  6790. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  6791. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  6792. auto int_type = to_signed_basetype(integer_width);
  6793. auto uint_type = to_unsigned_basetype(integer_width);
  6794. switch (opcode)
  6795. {
  6796. case OpLoad:
  6797. {
  6798. uint32_t id = ops[1];
  6799. uint32_t ptr = ops[2];
  6800. if (is_tessellation_shader())
  6801. {
  6802. if (!emit_tessellation_io_load(ops[0], id, ptr))
  6803. CompilerGLSL::emit_instruction(instruction);
  6804. }
  6805. else
  6806. {
  6807. // Sample mask input for Metal is not an array
  6808. if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask)
  6809. set_decoration(id, DecorationBuiltIn, BuiltInSampleMask);
  6810. CompilerGLSL::emit_instruction(instruction);
  6811. }
  6812. break;
  6813. }
  6814. // Comparisons
  6815. case OpIEqual:
  6816. MSL_BOP_CAST(==, int_type);
  6817. break;
  6818. case OpLogicalEqual:
  6819. case OpFOrdEqual:
  6820. MSL_BOP(==);
  6821. break;
  6822. case OpINotEqual:
  6823. MSL_BOP_CAST(!=, int_type);
  6824. break;
  6825. case OpLogicalNotEqual:
  6826. case OpFOrdNotEqual:
  6827. MSL_BOP(!=);
  6828. break;
  6829. case OpUGreaterThan:
  6830. MSL_BOP_CAST(>, uint_type);
  6831. break;
  6832. case OpSGreaterThan:
  6833. MSL_BOP_CAST(>, int_type);
  6834. break;
  6835. case OpFOrdGreaterThan:
  6836. MSL_BOP(>);
  6837. break;
  6838. case OpUGreaterThanEqual:
  6839. MSL_BOP_CAST(>=, uint_type);
  6840. break;
  6841. case OpSGreaterThanEqual:
  6842. MSL_BOP_CAST(>=, int_type);
  6843. break;
  6844. case OpFOrdGreaterThanEqual:
  6845. MSL_BOP(>=);
  6846. break;
  6847. case OpULessThan:
  6848. MSL_BOP_CAST(<, uint_type);
  6849. break;
  6850. case OpSLessThan:
  6851. MSL_BOP_CAST(<, int_type);
  6852. break;
  6853. case OpFOrdLessThan:
  6854. MSL_BOP(<);
  6855. break;
  6856. case OpULessThanEqual:
  6857. MSL_BOP_CAST(<=, uint_type);
  6858. break;
  6859. case OpSLessThanEqual:
  6860. MSL_BOP_CAST(<=, int_type);
  6861. break;
  6862. case OpFOrdLessThanEqual:
  6863. MSL_BOP(<=);
  6864. break;
  6865. case OpFUnordEqual:
  6866. MSL_UNORD_BOP(==);
  6867. break;
  6868. case OpFUnordNotEqual:
  6869. MSL_UNORD_BOP(!=);
  6870. break;
  6871. case OpFUnordGreaterThan:
  6872. MSL_UNORD_BOP(>);
  6873. break;
  6874. case OpFUnordGreaterThanEqual:
  6875. MSL_UNORD_BOP(>=);
  6876. break;
  6877. case OpFUnordLessThan:
  6878. MSL_UNORD_BOP(<);
  6879. break;
  6880. case OpFUnordLessThanEqual:
  6881. MSL_UNORD_BOP(<=);
  6882. break;
  6883. // Derivatives
  6884. case OpDPdx:
  6885. case OpDPdxFine:
  6886. case OpDPdxCoarse:
  6887. MSL_UFOP(dfdx);
  6888. register_control_dependent_expression(ops[1]);
  6889. break;
  6890. case OpDPdy:
  6891. case OpDPdyFine:
  6892. case OpDPdyCoarse:
  6893. MSL_UFOP(dfdy);
  6894. register_control_dependent_expression(ops[1]);
  6895. break;
  6896. case OpFwidth:
  6897. case OpFwidthCoarse:
  6898. case OpFwidthFine:
  6899. MSL_UFOP(fwidth);
  6900. register_control_dependent_expression(ops[1]);
  6901. break;
  6902. // Bitfield
  6903. case OpBitFieldInsert:
  6904. {
  6905. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt);
  6906. break;
  6907. }
  6908. case OpBitFieldSExtract:
  6909. {
  6910. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type,
  6911. SPIRType::UInt, SPIRType::UInt);
  6912. break;
  6913. }
  6914. case OpBitFieldUExtract:
  6915. {
  6916. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type,
  6917. SPIRType::UInt, SPIRType::UInt);
  6918. break;
  6919. }
  6920. case OpBitReverse:
  6921. // BitReverse does not have issues with sign since result type must match input type.
  6922. MSL_UFOP(reverse_bits);
  6923. break;
  6924. case OpBitCount:
  6925. {
  6926. auto basetype = expression_type(ops[2]).basetype;
  6927. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype);
  6928. break;
  6929. }
  6930. case OpFRem:
  6931. MSL_BFOP(fmod);
  6932. break;
  6933. case OpFMul:
  6934. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  6935. MSL_BFOP(spvFMul);
  6936. else
  6937. MSL_BOP(*);
  6938. break;
  6939. case OpFAdd:
  6940. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  6941. MSL_BFOP(spvFAdd);
  6942. else
  6943. MSL_BOP(+);
  6944. break;
  6945. case OpFSub:
  6946. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  6947. MSL_BFOP(spvFSub);
  6948. else
  6949. MSL_BOP(-);
  6950. break;
  6951. // Atomics
  6952. case OpAtomicExchange:
  6953. {
  6954. uint32_t result_type = ops[0];
  6955. uint32_t id = ops[1];
  6956. uint32_t ptr = ops[2];
  6957. uint32_t mem_sem = ops[4];
  6958. uint32_t val = ops[5];
  6959. emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", mem_sem, mem_sem, false, ptr, val);
  6960. break;
  6961. }
  6962. case OpAtomicCompareExchange:
  6963. {
  6964. uint32_t result_type = ops[0];
  6965. uint32_t id = ops[1];
  6966. uint32_t ptr = ops[2];
  6967. uint32_t mem_sem_pass = ops[4];
  6968. uint32_t mem_sem_fail = ops[5];
  6969. uint32_t val = ops[6];
  6970. uint32_t comp = ops[7];
  6971. emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true,
  6972. ptr, comp, true, false, val);
  6973. break;
  6974. }
  6975. case OpAtomicCompareExchangeWeak:
  6976. SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile.");
  6977. case OpAtomicLoad:
  6978. {
  6979. uint32_t result_type = ops[0];
  6980. uint32_t id = ops[1];
  6981. uint32_t ptr = ops[2];
  6982. uint32_t mem_sem = ops[4];
  6983. emit_atomic_func_op(result_type, id, "atomic_load_explicit", mem_sem, mem_sem, false, ptr, 0);
  6984. break;
  6985. }
  6986. case OpAtomicStore:
  6987. {
  6988. uint32_t result_type = expression_type(ops[0]).self;
  6989. uint32_t id = ops[0];
  6990. uint32_t ptr = ops[0];
  6991. uint32_t mem_sem = ops[2];
  6992. uint32_t val = ops[3];
  6993. emit_atomic_func_op(result_type, id, "atomic_store_explicit", mem_sem, mem_sem, false, ptr, val);
  6994. break;
  6995. }
  6996. #define MSL_AFMO_IMPL(op, valsrc, valconst) \
  6997. do \
  6998. { \
  6999. uint32_t result_type = ops[0]; \
  7000. uint32_t id = ops[1]; \
  7001. uint32_t ptr = ops[2]; \
  7002. uint32_t mem_sem = ops[4]; \
  7003. uint32_t val = valsrc; \
  7004. emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \
  7005. false, valconst); \
  7006. } while (false)
  7007. #define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false)
  7008. #define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true)
  7009. case OpAtomicIIncrement:
  7010. MSL_AFMIO(add);
  7011. break;
  7012. case OpAtomicIDecrement:
  7013. MSL_AFMIO(sub);
  7014. break;
  7015. case OpAtomicIAdd:
  7016. MSL_AFMO(add);
  7017. break;
  7018. case OpAtomicISub:
  7019. MSL_AFMO(sub);
  7020. break;
  7021. case OpAtomicSMin:
  7022. case OpAtomicUMin:
  7023. MSL_AFMO(min);
  7024. break;
  7025. case OpAtomicSMax:
  7026. case OpAtomicUMax:
  7027. MSL_AFMO(max);
  7028. break;
  7029. case OpAtomicAnd:
  7030. MSL_AFMO(and);
  7031. break;
  7032. case OpAtomicOr:
  7033. MSL_AFMO(or);
  7034. break;
  7035. case OpAtomicXor:
  7036. MSL_AFMO(xor);
  7037. break;
  7038. // Images
  7039. // Reads == Fetches in Metal
  7040. case OpImageRead:
  7041. {
  7042. // Mark that this shader reads from this image
  7043. uint32_t img_id = ops[2];
  7044. auto &type = expression_type(img_id);
  7045. if (type.image.dim != DimSubpassData)
  7046. {
  7047. auto *p_var = maybe_get_backing_variable(img_id);
  7048. if (p_var && has_decoration(p_var->self, DecorationNonReadable))
  7049. {
  7050. unset_decoration(p_var->self, DecorationNonReadable);
  7051. force_recompile();
  7052. }
  7053. }
  7054. emit_texture_op(instruction, false);
  7055. break;
  7056. }
  7057. // Emulate texture2D atomic operations
  7058. case OpImageTexelPointer:
  7059. {
  7060. // When using the pointer, we need to know which variable it is actually loaded from.
  7061. auto *var = maybe_get_backing_variable(ops[2]);
  7062. if (var && atomic_image_vars.count(var->self))
  7063. {
  7064. uint32_t result_type = ops[0];
  7065. uint32_t id = ops[1];
  7066. std::string coord = to_expression(ops[3]);
  7067. auto &type = expression_type(ops[2]);
  7068. if (type.image.dim == Dim2D)
  7069. {
  7070. coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")");
  7071. }
  7072. auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true);
  7073. e.loaded_from = var ? var->self : ID(0);
  7074. inherit_expression_dependencies(id, ops[3]);
  7075. }
  7076. else
  7077. {
  7078. uint32_t result_type = ops[0];
  7079. uint32_t id = ops[1];
  7080. auto &e =
  7081. set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);
  7082. // When using the pointer, we need to know which variable it is actually loaded from.
  7083. e.loaded_from = var ? var->self : ID(0);
  7084. inherit_expression_dependencies(id, ops[3]);
  7085. }
  7086. break;
  7087. }
  7088. case OpImageWrite:
  7089. {
  7090. uint32_t img_id = ops[0];
  7091. uint32_t coord_id = ops[1];
  7092. uint32_t texel_id = ops[2];
  7093. const uint32_t *opt = &ops[3];
  7094. uint32_t length = instruction.length - 3;
  7095. // Bypass pointers because we need the real image struct
  7096. auto &type = expression_type(img_id);
  7097. auto &img_type = get<SPIRType>(type.self);
  7098. // Ensure this image has been marked as being written to and force a
  7099. // recommpile so that the image type output will include write access
  7100. auto *p_var = maybe_get_backing_variable(img_id);
  7101. if (p_var && has_decoration(p_var->self, DecorationNonWritable))
  7102. {
  7103. unset_decoration(p_var->self, DecorationNonWritable);
  7104. force_recompile();
  7105. }
  7106. bool forward = false;
  7107. uint32_t bias = 0;
  7108. uint32_t lod = 0;
  7109. uint32_t flags = 0;
  7110. if (length)
  7111. {
  7112. flags = *opt++;
  7113. length--;
  7114. }
  7115. auto test = [&](uint32_t &v, uint32_t flag) {
  7116. if (length && (flags & flag))
  7117. {
  7118. v = *opt++;
  7119. length--;
  7120. }
  7121. };
  7122. test(bias, ImageOperandsBiasMask);
  7123. test(lod, ImageOperandsLodMask);
  7124. auto &texel_type = expression_type(texel_id);
  7125. auto store_type = texel_type;
  7126. store_type.vecsize = 4;
  7127. TextureFunctionArguments args = {};
  7128. args.base.img = img_id;
  7129. args.base.imgtype = &img_type;
  7130. args.base.is_fetch = true;
  7131. args.coord = coord_id;
  7132. args.lod = lod;
  7133. statement(join(to_expression(img_id), ".write(",
  7134. remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
  7135. CompilerMSL::to_function_args(args, &forward), ");"));
  7136. if (p_var && variable_storage_is_aliased(*p_var))
  7137. flush_all_aliased_variables();
  7138. break;
  7139. }
  7140. case OpImageQuerySize:
  7141. case OpImageQuerySizeLod:
  7142. {
  7143. uint32_t rslt_type_id = ops[0];
  7144. auto &rslt_type = get<SPIRType>(rslt_type_id);
  7145. uint32_t id = ops[1];
  7146. uint32_t img_id = ops[2];
  7147. string img_exp = to_expression(img_id);
  7148. auto &img_type = expression_type(img_id);
  7149. Dim img_dim = img_type.image.dim;
  7150. bool img_is_array = img_type.image.arrayed;
  7151. if (img_type.basetype != SPIRType::Image)
  7152. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  7153. string lod;
  7154. if (opcode == OpImageQuerySizeLod)
  7155. {
  7156. // LOD index defaults to zero, so don't bother outputing level zero index
  7157. string decl_lod = to_expression(ops[3]);
  7158. if (decl_lod != "0")
  7159. lod = decl_lod;
  7160. }
  7161. string expr = type_to_glsl(rslt_type) + "(";
  7162. expr += img_exp + ".get_width(" + lod + ")";
  7163. if (img_dim == Dim2D || img_dim == DimCube || img_dim == Dim3D)
  7164. expr += ", " + img_exp + ".get_height(" + lod + ")";
  7165. if (img_dim == Dim3D)
  7166. expr += ", " + img_exp + ".get_depth(" + lod + ")";
  7167. if (img_is_array)
  7168. {
  7169. expr += ", " + img_exp + ".get_array_size()";
  7170. if (img_dim == DimCube && msl_options.emulate_cube_array)
  7171. expr += " / 6";
  7172. }
  7173. expr += ")";
  7174. emit_op(rslt_type_id, id, expr, should_forward(img_id));
  7175. break;
  7176. }
  7177. case OpImageQueryLod:
  7178. {
  7179. if (!msl_options.supports_msl_version(2, 2))
  7180. SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up.");
  7181. uint32_t result_type = ops[0];
  7182. uint32_t id = ops[1];
  7183. uint32_t image_id = ops[2];
  7184. uint32_t coord_id = ops[3];
  7185. emit_uninitialized_temporary_expression(result_type, id);
  7186. auto sampler_expr = to_sampler_expression(image_id);
  7187. auto *combined = maybe_get<SPIRCombinedImageSampler>(image_id);
  7188. auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id);
  7189. // TODO: It is unclear if calculcate_clamped_lod also conditionally rounds
  7190. // the reported LOD based on the sampler. NEAREST miplevel should
  7191. // round the LOD, but LINEAR miplevel should not round.
  7192. // Let's hope this does not become an issue ...
  7193. statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ",
  7194. to_expression(coord_id), ");");
  7195. statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ",
  7196. to_expression(coord_id), ");");
  7197. register_control_dependent_expression(id);
  7198. break;
  7199. }
  7200. #define MSL_ImgQry(qrytype) \
  7201. do \
  7202. { \
  7203. uint32_t rslt_type_id = ops[0]; \
  7204. auto &rslt_type = get<SPIRType>(rslt_type_id); \
  7205. uint32_t id = ops[1]; \
  7206. uint32_t img_id = ops[2]; \
  7207. string img_exp = to_expression(img_id); \
  7208. string expr = type_to_glsl(rslt_type) + "(" + img_exp + ".get_num_" #qrytype "())"; \
  7209. emit_op(rslt_type_id, id, expr, should_forward(img_id)); \
  7210. } while (false)
  7211. case OpImageQueryLevels:
  7212. MSL_ImgQry(mip_levels);
  7213. break;
  7214. case OpImageQuerySamples:
  7215. MSL_ImgQry(samples);
  7216. break;
  7217. case OpImage:
  7218. {
  7219. uint32_t result_type = ops[0];
  7220. uint32_t id = ops[1];
  7221. auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]);
  7222. if (combined)
  7223. {
  7224. auto &e = emit_op(result_type, id, to_expression(combined->image), true, true);
  7225. auto *var = maybe_get_backing_variable(combined->image);
  7226. if (var)
  7227. e.loaded_from = var->self;
  7228. }
  7229. else
  7230. {
  7231. auto *var = maybe_get_backing_variable(ops[2]);
  7232. SPIRExpression *e;
  7233. if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler))
  7234. e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true);
  7235. else
  7236. e = &emit_op(result_type, id, to_expression(ops[2]), true, true);
  7237. if (var)
  7238. e->loaded_from = var->self;
  7239. }
  7240. break;
  7241. }
  7242. // Casting
  7243. case OpQuantizeToF16:
  7244. {
  7245. uint32_t result_type = ops[0];
  7246. uint32_t id = ops[1];
  7247. uint32_t arg = ops[2];
  7248. string exp = join("spvQuantizeToF16(", to_expression(arg), ")");
  7249. emit_op(result_type, id, exp, should_forward(arg));
  7250. break;
  7251. }
  7252. case OpInBoundsAccessChain:
  7253. case OpAccessChain:
  7254. case OpPtrAccessChain:
  7255. if (is_tessellation_shader())
  7256. {
  7257. if (!emit_tessellation_access_chain(ops, instruction.length))
  7258. CompilerGLSL::emit_instruction(instruction);
  7259. }
  7260. else
  7261. CompilerGLSL::emit_instruction(instruction);
  7262. fix_up_interpolant_access_chain(ops, instruction.length);
  7263. break;
  7264. case OpStore:
  7265. if (is_out_of_bounds_tessellation_level(ops[0]))
  7266. break;
  7267. if (maybe_emit_array_assignment(ops[0], ops[1]))
  7268. break;
  7269. CompilerGLSL::emit_instruction(instruction);
  7270. break;
  7271. // Compute barriers
  7272. case OpMemoryBarrier:
  7273. emit_barrier(0, ops[0], ops[1]);
  7274. break;
  7275. case OpControlBarrier:
  7276. // In GLSL a memory barrier is often followed by a control barrier.
  7277. // But in MSL, memory barriers are also control barriers, so don't
  7278. // emit a simple control barrier if a memory barrier has just been emitted.
  7279. if (previous_instruction_opcode != OpMemoryBarrier)
  7280. emit_barrier(ops[0], ops[1], ops[2]);
  7281. break;
  7282. case OpOuterProduct:
  7283. {
  7284. uint32_t result_type = ops[0];
  7285. uint32_t id = ops[1];
  7286. uint32_t a = ops[2];
  7287. uint32_t b = ops[3];
  7288. auto &type = get<SPIRType>(result_type);
  7289. string expr = type_to_glsl_constructor(type);
  7290. expr += "(";
  7291. for (uint32_t col = 0; col < type.columns; col++)
  7292. {
  7293. expr += to_enclosed_unpacked_expression(a);
  7294. expr += " * ";
  7295. expr += to_extract_component_expression(b, col);
  7296. if (col + 1 < type.columns)
  7297. expr += ", ";
  7298. }
  7299. expr += ")";
  7300. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  7301. inherit_expression_dependencies(id, a);
  7302. inherit_expression_dependencies(id, b);
  7303. break;
  7304. }
  7305. case OpVectorTimesMatrix:
  7306. case OpMatrixTimesVector:
  7307. {
  7308. if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
  7309. {
  7310. CompilerGLSL::emit_instruction(instruction);
  7311. break;
  7312. }
  7313. // If the matrix needs transpose, just flip the multiply order.
  7314. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  7315. if (e && e->need_transpose)
  7316. {
  7317. e->need_transpose = false;
  7318. string expr;
  7319. if (opcode == OpMatrixTimesVector)
  7320. {
  7321. expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ",
  7322. to_unpacked_row_major_matrix_expression(ops[2]), ")");
  7323. }
  7324. else
  7325. {
  7326. expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ",
  7327. to_enclosed_unpacked_expression(ops[2]), ")");
  7328. }
  7329. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  7330. emit_op(ops[0], ops[1], expr, forward);
  7331. e->need_transpose = true;
  7332. inherit_expression_dependencies(ops[1], ops[2]);
  7333. inherit_expression_dependencies(ops[1], ops[3]);
  7334. }
  7335. else
  7336. {
  7337. if (opcode == OpMatrixTimesVector)
  7338. MSL_BFOP(spvFMulMatrixVector);
  7339. else
  7340. MSL_BFOP(spvFMulVectorMatrix);
  7341. }
  7342. break;
  7343. }
  7344. case OpMatrixTimesMatrix:
  7345. {
  7346. if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
  7347. {
  7348. CompilerGLSL::emit_instruction(instruction);
  7349. break;
  7350. }
  7351. auto *a = maybe_get<SPIRExpression>(ops[2]);
  7352. auto *b = maybe_get<SPIRExpression>(ops[3]);
  7353. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  7354. // a^T * b^T = (b * a)^T.
  7355. if (a && b && a->need_transpose && b->need_transpose)
  7356. {
  7357. a->need_transpose = false;
  7358. b->need_transpose = false;
  7359. auto expr =
  7360. join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ",
  7361. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")");
  7362. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  7363. auto &e = emit_op(ops[0], ops[1], expr, forward);
  7364. e.need_transpose = true;
  7365. a->need_transpose = true;
  7366. b->need_transpose = true;
  7367. inherit_expression_dependencies(ops[1], ops[2]);
  7368. inherit_expression_dependencies(ops[1], ops[3]);
  7369. }
  7370. else
  7371. MSL_BFOP(spvFMulMatrixMatrix);
  7372. break;
  7373. }
  7374. case OpIAddCarry:
  7375. case OpISubBorrow:
  7376. {
  7377. uint32_t result_type = ops[0];
  7378. uint32_t result_id = ops[1];
  7379. uint32_t op0 = ops[2];
  7380. uint32_t op1 = ops[3];
  7381. auto &type = get<SPIRType>(result_type);
  7382. emit_uninitialized_temporary_expression(result_type, result_id);
  7383. auto &res_type = get<SPIRType>(type.member_types[1]);
  7384. if (opcode == OpIAddCarry)
  7385. {
  7386. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ",
  7387. to_enclosed_unpacked_expression(op0), " + ", to_enclosed_unpacked_expression(op1), ";");
  7388. statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
  7389. "(1), ", type_to_glsl(res_type), "(0), ", to_unpacked_expression(result_id), ".", to_member_name(type, 0),
  7390. " >= max(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "));");
  7391. }
  7392. else
  7393. {
  7394. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_unpacked_expression(op0), " - ",
  7395. to_enclosed_unpacked_expression(op1), ";");
  7396. statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
  7397. "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_unpacked_expression(op0),
  7398. " >= ", to_enclosed_unpacked_expression(op1), ");");
  7399. }
  7400. break;
  7401. }
  7402. case OpUMulExtended:
  7403. case OpSMulExtended:
  7404. {
  7405. uint32_t result_type = ops[0];
  7406. uint32_t result_id = ops[1];
  7407. uint32_t op0 = ops[2];
  7408. uint32_t op1 = ops[3];
  7409. auto &type = get<SPIRType>(result_type);
  7410. emit_uninitialized_temporary_expression(result_type, result_id);
  7411. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ",
  7412. to_enclosed_unpacked_expression(op0), " * ", to_enclosed_unpacked_expression(op1), ";");
  7413. statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(",
  7414. to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ");");
  7415. break;
  7416. }
  7417. case OpArrayLength:
  7418. {
  7419. auto &type = expression_type(ops[2]);
  7420. uint32_t offset = type_struct_member_offset(type, ops[3]);
  7421. uint32_t stride = type_struct_member_array_stride(type, ops[3]);
  7422. auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride);
  7423. emit_op(ops[0], ops[1], expr, true);
  7424. break;
  7425. }
  7426. // SPV_INTEL_shader_integer_functions2
  7427. case OpUCountLeadingZerosINTEL:
  7428. MSL_UFOP(clz);
  7429. break;
  7430. case OpUCountTrailingZerosINTEL:
  7431. MSL_UFOP(ctz);
  7432. break;
  7433. case OpAbsISubINTEL:
  7434. case OpAbsUSubINTEL:
  7435. MSL_BFOP(absdiff);
  7436. break;
  7437. case OpIAddSatINTEL:
  7438. case OpUAddSatINTEL:
  7439. MSL_BFOP(addsat);
  7440. break;
  7441. case OpIAverageINTEL:
  7442. case OpUAverageINTEL:
  7443. MSL_BFOP(hadd);
  7444. break;
  7445. case OpIAverageRoundedINTEL:
  7446. case OpUAverageRoundedINTEL:
  7447. MSL_BFOP(rhadd);
  7448. break;
  7449. case OpISubSatINTEL:
  7450. case OpUSubSatINTEL:
  7451. MSL_BFOP(subsat);
  7452. break;
  7453. case OpIMul32x16INTEL:
  7454. {
  7455. uint32_t result_type = ops[0];
  7456. uint32_t id = ops[1];
  7457. uint32_t a = ops[2], b = ops[3];
  7458. bool forward = should_forward(a) && should_forward(b);
  7459. emit_op(result_type, id, join("int(short(", to_unpacked_expression(a), ")) * int(short(", to_unpacked_expression(b), "))"), forward);
  7460. inherit_expression_dependencies(id, a);
  7461. inherit_expression_dependencies(id, b);
  7462. break;
  7463. }
  7464. case OpUMul32x16INTEL:
  7465. {
  7466. uint32_t result_type = ops[0];
  7467. uint32_t id = ops[1];
  7468. uint32_t a = ops[2], b = ops[3];
  7469. bool forward = should_forward(a) && should_forward(b);
  7470. emit_op(result_type, id, join("uint(ushort(", to_unpacked_expression(a), ")) * uint(ushort(", to_unpacked_expression(b), "))"), forward);
  7471. inherit_expression_dependencies(id, a);
  7472. inherit_expression_dependencies(id, b);
  7473. break;
  7474. }
  7475. // SPV_EXT_demote_to_helper_invocation
  7476. case OpDemoteToHelperInvocationEXT:
  7477. if (!msl_options.supports_msl_version(2, 3))
  7478. SPIRV_CROSS_THROW("discard_fragment() does not formally have demote semantics until MSL 2.3.");
  7479. CompilerGLSL::emit_instruction(instruction);
  7480. break;
  7481. case OpIsHelperInvocationEXT:
  7482. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  7483. SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.3 on iOS.");
  7484. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  7485. SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.1 on macOS.");
  7486. emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
  7487. break;
  7488. case OpBeginInvocationInterlockEXT:
  7489. case OpEndInvocationInterlockEXT:
  7490. if (!msl_options.supports_msl_version(2, 0))
  7491. SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
  7492. break; // Nothing to do in the body
  7493. case OpConvertUToAccelerationStructureKHR:
  7494. SPIRV_CROSS_THROW("ConvertUToAccelerationStructure is not supported in MSL.");
  7495. case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
  7496. SPIRV_CROSS_THROW("BindingTableRecordOffset is not supported in MSL.");
  7497. case OpRayQueryInitializeKHR:
  7498. {
  7499. flush_variable_declaration(ops[0]);
  7500. statement(to_expression(ops[0]), ".reset(", "ray(", to_expression(ops[4]), ", ", to_expression(ops[6]), ", ",
  7501. to_expression(ops[5]), ", ", to_expression(ops[7]), "), ", to_expression(ops[1]),
  7502. ", intersection_params());");
  7503. break;
  7504. }
  7505. case OpRayQueryProceedKHR:
  7506. {
  7507. flush_variable_declaration(ops[0]);
  7508. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".next()"), false);
  7509. break;
  7510. }
  7511. #define MSL_RAY_QUERY_IS_CANDIDATE get<SPIRConstant>(ops[3]).scalar_i32() == 0
  7512. #define MSL_RAY_QUERY_GET_OP(op, msl_op) \
  7513. case OpRayQueryGet##op##KHR: \
  7514. flush_variable_declaration(ops[2]); \
  7515. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_" #msl_op "()"), false); \
  7516. break
  7517. #define MSL_RAY_QUERY_OP_INNER2(op, msl_prefix, msl_op) \
  7518. case OpRayQueryGet##op##KHR: \
  7519. flush_variable_declaration(ops[2]); \
  7520. if (MSL_RAY_QUERY_IS_CANDIDATE) \
  7521. emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_candidate_" #msl_op "()"), false); \
  7522. else \
  7523. emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_committed_" #msl_op "()"), false); \
  7524. break
  7525. #define MSL_RAY_QUERY_GET_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .get, msl_op)
  7526. #define MSL_RAY_QUERY_IS_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .is, msl_op)
  7527. MSL_RAY_QUERY_GET_OP(RayTMin, ray_min_distance);
  7528. MSL_RAY_QUERY_GET_OP(WorldRayOrigin, world_space_ray_direction);
  7529. MSL_RAY_QUERY_GET_OP(WorldRayDirection, world_space_ray_origin);
  7530. MSL_RAY_QUERY_GET_OP2(IntersectionInstanceId, instance_id);
  7531. MSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex, user_instance_id);
  7532. MSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics, triangle_barycentric_coord);
  7533. MSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex, primitive_id);
  7534. MSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex, geometry_id);
  7535. MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin, ray_origin);
  7536. MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection, ray_direction);
  7537. MSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld, object_to_world_transform);
  7538. MSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject, world_to_object_transform);
  7539. MSL_RAY_QUERY_IS_OP2(IntersectionFrontFace, triangle_front_facing);
  7540. case OpRayQueryGetIntersectionTypeKHR:
  7541. flush_variable_declaration(ops[2]);
  7542. if (MSL_RAY_QUERY_IS_CANDIDATE)
  7543. emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_candidate_intersection_type()) - 1"),
  7544. false);
  7545. else
  7546. emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_committed_intersection_type())"), false);
  7547. break;
  7548. case OpRayQueryGetIntersectionTKHR:
  7549. flush_variable_declaration(ops[2]);
  7550. if (MSL_RAY_QUERY_IS_CANDIDATE)
  7551. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_candidate_triangle_distance()"), false);
  7552. else
  7553. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_committed_distance()"), false);
  7554. break;
  7555. case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
  7556. {
  7557. flush_variable_declaration(ops[0]);
  7558. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".is_candidate_non_opaque_bounding_box()"), false);
  7559. break;
  7560. }
  7561. case OpRayQueryConfirmIntersectionKHR:
  7562. flush_variable_declaration(ops[0]);
  7563. statement(to_expression(ops[0]), ".commit_triangle_intersection();");
  7564. break;
  7565. case OpRayQueryGenerateIntersectionKHR:
  7566. flush_variable_declaration(ops[0]);
  7567. statement(to_expression(ops[0]), ".commit_bounding_box_intersection(", to_expression(ops[1]), ");");
  7568. break;
  7569. case OpRayQueryTerminateKHR:
  7570. flush_variable_declaration(ops[0]);
  7571. statement(to_expression(ops[0]), ".abort();");
  7572. break;
  7573. #undef MSL_RAY_QUERY_GET_OP
  7574. #undef MSL_RAY_QUERY_IS_CANDIDATE
  7575. #undef MSL_RAY_QUERY_IS_OP2
  7576. #undef MSL_RAY_QUERY_GET_OP2
  7577. #undef MSL_RAY_QUERY_OP_INNER2
  7578. default:
  7579. CompilerGLSL::emit_instruction(instruction);
  7580. break;
  7581. }
  7582. previous_instruction_opcode = opcode;
  7583. }
  7584. void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
  7585. {
  7586. if (sparse)
  7587. SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
  7588. if (msl_options.use_framebuffer_fetch_subpasses)
  7589. {
  7590. auto *ops = stream(i);
  7591. uint32_t result_type_id = ops[0];
  7592. uint32_t id = ops[1];
  7593. uint32_t img = ops[2];
  7594. auto &type = expression_type(img);
  7595. auto &imgtype = get<SPIRType>(type.self);
  7596. // Use Metal's native frame-buffer fetch API for subpass inputs.
  7597. if (imgtype.image.dim == DimSubpassData)
  7598. {
  7599. // Subpass inputs cannot be invalidated,
  7600. // so just forward the expression directly.
  7601. string expr = to_expression(img);
  7602. emit_op(result_type_id, id, expr, true);
  7603. return;
  7604. }
  7605. }
  7606. // Fallback to default implementation
  7607. CompilerGLSL::emit_texture_op(i, sparse);
  7608. }
  7609. void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem)
  7610. {
  7611. if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl)
  7612. return;
  7613. uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation);
  7614. uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation);
  7615. // Use the wider of the two scopes (smaller value)
  7616. exe_scope = min(exe_scope, mem_scope);
  7617. if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem)
  7618. // In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop.
  7619. return;
  7620. string bar_stmt;
  7621. if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
  7622. bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
  7623. else
  7624. bar_stmt = "threadgroup_barrier";
  7625. bar_stmt += "(";
  7626. uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone);
  7627. // Use the | operator to combine flags if we can.
  7628. if (msl_options.supports_msl_version(1, 2))
  7629. {
  7630. string mem_flags = "";
  7631. // For tesc shaders, this also affects objects in the Output storage class.
  7632. // Since in Metal, these are placed in a device buffer, we have to sync device memory here.
  7633. if (get_execution_model() == ExecutionModelTessellationControl ||
  7634. (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
  7635. mem_flags += "mem_flags::mem_device";
  7636. // Fix tessellation patch function processing
  7637. if (get_execution_model() == ExecutionModelTessellationControl ||
  7638. (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
  7639. {
  7640. if (!mem_flags.empty())
  7641. mem_flags += " | ";
  7642. mem_flags += "mem_flags::mem_threadgroup";
  7643. }
  7644. if (mem_sem & MemorySemanticsImageMemoryMask)
  7645. {
  7646. if (!mem_flags.empty())
  7647. mem_flags += " | ";
  7648. mem_flags += "mem_flags::mem_texture";
  7649. }
  7650. if (mem_flags.empty())
  7651. mem_flags = "mem_flags::mem_none";
  7652. bar_stmt += mem_flags;
  7653. }
  7654. else
  7655. {
  7656. if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) &&
  7657. (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
  7658. bar_stmt += "mem_flags::mem_device_and_threadgroup";
  7659. else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))
  7660. bar_stmt += "mem_flags::mem_device";
  7661. else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))
  7662. bar_stmt += "mem_flags::mem_threadgroup";
  7663. else if (mem_sem & MemorySemanticsImageMemoryMask)
  7664. bar_stmt += "mem_flags::mem_texture";
  7665. else
  7666. bar_stmt += "mem_flags::mem_none";
  7667. }
  7668. bar_stmt += ");";
  7669. statement(bar_stmt);
  7670. assert(current_emitting_block);
  7671. flush_control_dependent_expressions(current_emitting_block->self);
  7672. flush_all_active_variables();
  7673. }
  7674. static bool storage_class_array_is_thread(StorageClass storage)
  7675. {
  7676. switch (storage)
  7677. {
  7678. case StorageClassInput:
  7679. case StorageClassOutput:
  7680. case StorageClassGeneric:
  7681. case StorageClassFunction:
  7682. case StorageClassPrivate:
  7683. return true;
  7684. default:
  7685. return false;
  7686. }
  7687. }
  7688. void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id,
  7689. StorageClass lhs_storage, StorageClass rhs_storage)
  7690. {
  7691. // Allow Metal to use the array<T> template to make arrays a value type.
  7692. // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
  7693. bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
  7694. bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
  7695. bool lhs_is_array_template = lhs_is_thread_storage;
  7696. bool rhs_is_array_template = rhs_is_thread_storage;
  7697. // Special considerations for stage IO variables.
  7698. // If the variable is actually backed by non-user visible device storage, we use array templates for those.
  7699. //
  7700. // Another special consideration is given to thread local variables which happen to have Offset decorations
  7701. // applied to them. Block-like types do not use array templates, so we need to force POD path if we detect
  7702. // these scenarios. This check isn't perfect since it would be technically possible to mix and match these things,
  7703. // and for a fully correct solution we might have to track array template state through access chains as well,
  7704. // but for all reasonable use cases, this should suffice.
  7705. // This special case should also only apply to Function/Private storage classes.
  7706. // We should not check backing variable for temporaries.
  7707. auto *lhs_var = maybe_get_backing_variable(lhs_id);
  7708. if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
  7709. lhs_is_array_template = true;
  7710. else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) &&
  7711. type_is_block_like(get<SPIRType>(lhs_var->basetype)))
  7712. lhs_is_array_template = false;
  7713. auto *rhs_var = maybe_get_backing_variable(rhs_id);
  7714. if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
  7715. rhs_is_array_template = true;
  7716. else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) &&
  7717. type_is_block_like(get<SPIRType>(rhs_var->basetype)))
  7718. rhs_is_array_template = false;
  7719. // If threadgroup storage qualifiers are *not* used:
  7720. // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
  7721. if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
  7722. {
  7723. statement(lhs, " = ", to_expression(rhs_id), ";");
  7724. }
  7725. else
  7726. {
  7727. // Assignment from an array initializer is fine.
  7728. auto &type = expression_type(rhs_id);
  7729. auto *var = maybe_get_backing_variable(rhs_id);
  7730. // Unfortunately, we cannot template on address space in MSL,
  7731. // so explicit address space redirection it is ...
  7732. bool is_constant = false;
  7733. if (ir.ids[rhs_id].get_type() == TypeConstant)
  7734. {
  7735. is_constant = true;
  7736. }
  7737. else if (var && var->remapped_variable && var->statically_assigned &&
  7738. ir.ids[var->static_expression].get_type() == TypeConstant)
  7739. {
  7740. is_constant = true;
  7741. }
  7742. else if (rhs_storage == StorageClassUniform)
  7743. {
  7744. is_constant = true;
  7745. }
  7746. // For the case where we have OpLoad triggering an array copy,
  7747. // we cannot easily detect this case ahead of time since it's
  7748. // context dependent. We might have to force a recompile here
  7749. // if this is the only use of array copies in our shader.
  7750. if (type.array.size() > 1)
  7751. {
  7752. if (type.array.size() > kArrayCopyMultidimMax)
  7753. SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
  7754. auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
  7755. add_spv_func_and_recompile(func);
  7756. }
  7757. else
  7758. add_spv_func_and_recompile(SPVFuncImplArrayCopy);
  7759. const char *tag = nullptr;
  7760. if (lhs_is_thread_storage && is_constant)
  7761. tag = "FromConstantToStack";
  7762. else if (lhs_storage == StorageClassWorkgroup && is_constant)
  7763. tag = "FromConstantToThreadGroup";
  7764. else if (lhs_is_thread_storage && rhs_is_thread_storage)
  7765. tag = "FromStackToStack";
  7766. else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
  7767. tag = "FromStackToThreadGroup";
  7768. else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
  7769. tag = "FromThreadGroupToStack";
  7770. else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
  7771. tag = "FromThreadGroupToThreadGroup";
  7772. else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer)
  7773. tag = "FromDeviceToDevice";
  7774. else if (lhs_storage == StorageClassStorageBuffer && is_constant)
  7775. tag = "FromConstantToDevice";
  7776. else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
  7777. tag = "FromThreadGroupToDevice";
  7778. else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
  7779. tag = "FromStackToDevice";
  7780. else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
  7781. tag = "FromDeviceToThreadGroup";
  7782. else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
  7783. tag = "FromDeviceToStack";
  7784. else
  7785. SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
  7786. // Pass internal array of spvUnsafeArray<> into wrapper functions
  7787. if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
  7788. statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
  7789. if (lhs_is_array_template && !msl_options.force_native_arrays)
  7790. statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
  7791. else if (rhs_is_array_template && !msl_options.force_native_arrays)
  7792. statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
  7793. else
  7794. statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
  7795. }
  7796. }
  7797. uint32_t CompilerMSL::get_physical_tess_level_array_size(spv::BuiltIn builtin) const
  7798. {
  7799. if (get_execution_mode_bitset().get(ExecutionModeTriangles))
  7800. return builtin == BuiltInTessLevelInner ? 1 : 3;
  7801. else
  7802. return builtin == BuiltInTessLevelInner ? 2 : 4;
  7803. }
  7804. // Since MSL does not allow arrays to be copied via simple variable assignment,
  7805. // if the LHS and RHS represent an assignment of an entire array, it must be
  7806. // implemented by calling an array copy function.
  7807. // Returns whether the struct assignment was emitted.
  7808. bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
  7809. {
  7810. // We only care about assignments of an entire array
  7811. auto &type = expression_type(id_rhs);
  7812. if (type.array.size() == 0)
  7813. return false;
  7814. auto *var = maybe_get<SPIRVariable>(id_lhs);
  7815. // Is this a remapped, static constant? Don't do anything.
  7816. if (var && var->remapped_variable && var->statically_assigned)
  7817. return true;
  7818. if (ir.ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration)
  7819. {
  7820. // Special case, if we end up declaring a variable when assigning the constant array,
  7821. // we can avoid the copy by directly assigning the constant expression.
  7822. // This is likely necessary to be able to use a variable as a true look-up table, as it is unlikely
  7823. // the compiler will be able to optimize the spvArrayCopy() into a constant LUT.
  7824. // After a variable has been declared, we can no longer assign constant arrays in MSL unfortunately.
  7825. statement(to_expression(id_lhs), " = ", constant_expression(get<SPIRConstant>(id_rhs)), ";");
  7826. return true;
  7827. }
  7828. if (get_execution_model() == ExecutionModelTessellationControl &&
  7829. has_decoration(id_lhs, DecorationBuiltIn))
  7830. {
  7831. auto builtin = BuiltIn(get_decoration(id_lhs, DecorationBuiltIn));
  7832. // Need to manually unroll the array store.
  7833. if (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)
  7834. {
  7835. uint32_t array_size = get_physical_tess_level_array_size(builtin);
  7836. if (array_size == 1)
  7837. statement(to_expression(id_lhs), " = half(", to_expression(id_rhs), "[0]);");
  7838. else
  7839. {
  7840. for (uint32_t i = 0; i < array_size; i++)
  7841. statement(to_expression(id_lhs), "[", i, "] = half(", to_expression(id_rhs), "[", i, "]);");
  7842. }
  7843. return true;
  7844. }
  7845. }
  7846. // Ensure the LHS variable has been declared
  7847. auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
  7848. if (p_v_lhs)
  7849. flush_variable_declaration(p_v_lhs->self);
  7850. auto lhs_storage = get_expression_effective_storage_class(id_lhs);
  7851. auto rhs_storage = get_expression_effective_storage_class(id_rhs);
  7852. emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage);
  7853. register_write(id_lhs);
  7854. return true;
  7855. }
  7856. // Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
  7857. void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
  7858. uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
  7859. bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
  7860. {
  7861. string exp = string(op) + "(";
  7862. auto &type = get_pointee_type(expression_type(obj));
  7863. exp += "(";
  7864. auto *var = maybe_get_backing_variable(obj);
  7865. if (!var)
  7866. SPIRV_CROSS_THROW("No backing variable for atomic operation.");
  7867. // Emulate texture2D atomic operations
  7868. const auto &res_type = get<SPIRType>(var->basetype);
  7869. if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image)
  7870. {
  7871. exp += "device";
  7872. }
  7873. else
  7874. {
  7875. exp += get_argument_address_space(*var);
  7876. }
  7877. exp += " atomic_";
  7878. exp += type_to_glsl(type);
  7879. exp += "*)";
  7880. exp += "&";
  7881. exp += to_enclosed_expression(obj);
  7882. bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
  7883. if (is_atomic_compare_exchange_strong)
  7884. {
  7885. assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0);
  7886. assert(op2);
  7887. assert(has_mem_order_2);
  7888. exp += ", &";
  7889. exp += to_name(result_id);
  7890. exp += ", ";
  7891. exp += to_expression(op2);
  7892. exp += ", ";
  7893. exp += get_memory_order(mem_order_1);
  7894. exp += ", ";
  7895. exp += get_memory_order(mem_order_2);
  7896. exp += ")";
  7897. // MSL only supports the weak atomic compare exchange, so emit a CAS loop here.
  7898. // The MSL function returns false if the atomic write fails OR the comparison test fails,
  7899. // so we must validate that it wasn't the comparison test that failed before continuing
  7900. // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing.
  7901. // The function updates the comparitor value from the memory value, so the additional
  7902. // comparison test evaluates the memory value against the expected value.
  7903. emit_uninitialized_temporary_expression(result_type, result_id);
  7904. statement("do");
  7905. begin_scope();
  7906. statement(to_name(result_id), " = ", to_expression(op1), ";");
  7907. end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")"));
  7908. }
  7909. else
  7910. {
  7911. assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0);
  7912. if (op1)
  7913. {
  7914. if (op1_is_literal)
  7915. exp += join(", ", op1);
  7916. else
  7917. exp += ", " + to_expression(op1);
  7918. }
  7919. if (op2)
  7920. exp += ", " + to_expression(op2);
  7921. exp += string(", ") + get_memory_order(mem_order_1);
  7922. if (has_mem_order_2)
  7923. exp += string(", ") + get_memory_order(mem_order_2);
  7924. exp += ")";
  7925. if (strcmp(op, "atomic_store_explicit") != 0)
  7926. emit_op(result_type, result_id, exp, false);
  7927. else
  7928. statement(exp, ";");
  7929. }
  7930. flush_all_atomic_capable_variables();
  7931. }
  7932. // Metal only supports relaxed memory order for now
  7933. const char *CompilerMSL::get_memory_order(uint32_t)
  7934. {
  7935. return "memory_order_relaxed";
  7936. }
  7937. // Override for MSL-specific extension syntax instructions.
  7938. // In some cases, deliberately select either the fast or precise versions of the MSL functions to match Vulkan math precision results.
  7939. void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
  7940. {
  7941. auto op = static_cast<GLSLstd450>(eop);
  7942. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  7943. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count);
  7944. auto int_type = to_signed_basetype(integer_width);
  7945. auto uint_type = to_unsigned_basetype(integer_width);
  7946. switch (op)
  7947. {
  7948. case GLSLstd450Sinh:
  7949. emit_unary_func_op(result_type, id, args[0], "fast::sinh");
  7950. break;
  7951. case GLSLstd450Cosh:
  7952. emit_unary_func_op(result_type, id, args[0], "fast::cosh");
  7953. break;
  7954. case GLSLstd450Tanh:
  7955. emit_unary_func_op(result_type, id, args[0], "precise::tanh");
  7956. break;
  7957. case GLSLstd450Atan2:
  7958. emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
  7959. break;
  7960. case GLSLstd450InverseSqrt:
  7961. emit_unary_func_op(result_type, id, args[0], "rsqrt");
  7962. break;
  7963. case GLSLstd450RoundEven:
  7964. emit_unary_func_op(result_type, id, args[0], "rint");
  7965. break;
  7966. case GLSLstd450FindILsb:
  7967. {
  7968. // In this template version of findLSB, we return T.
  7969. auto basetype = expression_type(args[0]).basetype;
  7970. emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype);
  7971. break;
  7972. }
  7973. case GLSLstd450FindSMsb:
  7974. emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type);
  7975. break;
  7976. case GLSLstd450FindUMsb:
  7977. emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type);
  7978. break;
  7979. case GLSLstd450PackSnorm4x8:
  7980. emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm4x8");
  7981. break;
  7982. case GLSLstd450PackUnorm4x8:
  7983. emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm4x8");
  7984. break;
  7985. case GLSLstd450PackSnorm2x16:
  7986. emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm2x16");
  7987. break;
  7988. case GLSLstd450PackUnorm2x16:
  7989. emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16");
  7990. break;
  7991. case GLSLstd450PackHalf2x16:
  7992. {
  7993. auto expr = join("as_type<uint>(half2(", to_expression(args[0]), "))");
  7994. emit_op(result_type, id, expr, should_forward(args[0]));
  7995. inherit_expression_dependencies(id, args[0]);
  7996. break;
  7997. }
  7998. case GLSLstd450UnpackSnorm4x8:
  7999. emit_unary_func_op(result_type, id, args[0], "unpack_snorm4x8_to_float");
  8000. break;
  8001. case GLSLstd450UnpackUnorm4x8:
  8002. emit_unary_func_op(result_type, id, args[0], "unpack_unorm4x8_to_float");
  8003. break;
  8004. case GLSLstd450UnpackSnorm2x16:
  8005. emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_float");
  8006. break;
  8007. case GLSLstd450UnpackUnorm2x16:
  8008. emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float");
  8009. break;
  8010. case GLSLstd450UnpackHalf2x16:
  8011. {
  8012. auto expr = join("float2(as_type<half2>(", to_expression(args[0]), "))");
  8013. emit_op(result_type, id, expr, should_forward(args[0]));
  8014. inherit_expression_dependencies(id, args[0]);
  8015. break;
  8016. }
  8017. case GLSLstd450PackDouble2x32:
  8018. emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackDouble2x32"); // Currently unsupported
  8019. break;
  8020. case GLSLstd450UnpackDouble2x32:
  8021. emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackDouble2x32"); // Currently unsupported
  8022. break;
  8023. case GLSLstd450MatrixInverse:
  8024. {
  8025. auto &mat_type = get<SPIRType>(result_type);
  8026. switch (mat_type.columns)
  8027. {
  8028. case 2:
  8029. emit_unary_func_op(result_type, id, args[0], "spvInverse2x2");
  8030. break;
  8031. case 3:
  8032. emit_unary_func_op(result_type, id, args[0], "spvInverse3x3");
  8033. break;
  8034. case 4:
  8035. emit_unary_func_op(result_type, id, args[0], "spvInverse4x4");
  8036. break;
  8037. default:
  8038. break;
  8039. }
  8040. break;
  8041. }
  8042. case GLSLstd450FMin:
  8043. // If the result type isn't float, don't bother calling the specific
  8044. // precise::/fast:: version. Metal doesn't have those for half and
  8045. // double types.
  8046. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8047. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  8048. else
  8049. emit_binary_func_op(result_type, id, args[0], args[1], "fast::min");
  8050. break;
  8051. case GLSLstd450FMax:
  8052. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8053. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  8054. else
  8055. emit_binary_func_op(result_type, id, args[0], args[1], "fast::max");
  8056. break;
  8057. case GLSLstd450FClamp:
  8058. // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
  8059. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8060. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  8061. else
  8062. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp");
  8063. break;
  8064. case GLSLstd450NMin:
  8065. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8066. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  8067. else
  8068. emit_binary_func_op(result_type, id, args[0], args[1], "precise::min");
  8069. break;
  8070. case GLSLstd450NMax:
  8071. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8072. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  8073. else
  8074. emit_binary_func_op(result_type, id, args[0], args[1], "precise::max");
  8075. break;
  8076. case GLSLstd450NClamp:
  8077. // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
  8078. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  8079. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  8080. else
  8081. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
  8082. break;
  8083. case GLSLstd450InterpolateAtCentroid:
  8084. {
  8085. // We can't just emit the expression normally, because the qualified name contains a call to the default
  8086. // interpolate method, or refers to a local variable. We saved the interface index we need; use it to construct
  8087. // the base for the method call.
  8088. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  8089. string component;
  8090. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  8091. {
  8092. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  8093. auto *c = maybe_get<SPIRConstant>(index_expr);
  8094. if (!c || c->specialization)
  8095. component = join("[", to_expression(index_expr), "]");
  8096. else
  8097. component = join(".", index_to_swizzle(c->scalar()));
  8098. }
  8099. emit_op(result_type, id,
  8100. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  8101. ".interpolate_at_centroid()", component),
  8102. should_forward(args[0]));
  8103. break;
  8104. }
  8105. case GLSLstd450InterpolateAtSample:
  8106. {
  8107. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  8108. string component;
  8109. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  8110. {
  8111. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  8112. auto *c = maybe_get<SPIRConstant>(index_expr);
  8113. if (!c || c->specialization)
  8114. component = join("[", to_expression(index_expr), "]");
  8115. else
  8116. component = join(".", index_to_swizzle(c->scalar()));
  8117. }
  8118. emit_op(result_type, id,
  8119. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  8120. ".interpolate_at_sample(", to_expression(args[1]), ")", component),
  8121. should_forward(args[0]) && should_forward(args[1]));
  8122. break;
  8123. }
  8124. case GLSLstd450InterpolateAtOffset:
  8125. {
  8126. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  8127. string component;
  8128. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  8129. {
  8130. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  8131. auto *c = maybe_get<SPIRConstant>(index_expr);
  8132. if (!c || c->specialization)
  8133. component = join("[", to_expression(index_expr), "]");
  8134. else
  8135. component = join(".", index_to_swizzle(c->scalar()));
  8136. }
  8137. // Like Direct3D, Metal puts the (0, 0) at the upper-left corner, not the center as SPIR-V and GLSL do.
  8138. // Offset the offset by (1/2 - 1/16), or 0.4375, to compensate for this.
  8139. // It has to be (1/2 - 1/16) and not 1/2, or several CTS tests subtly break on Intel.
  8140. emit_op(result_type, id,
  8141. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  8142. ".interpolate_at_offset(", to_expression(args[1]), " + 0.4375)", component),
  8143. should_forward(args[0]) && should_forward(args[1]));
  8144. break;
  8145. }
  8146. case GLSLstd450Distance:
  8147. // MSL does not support scalar versions here.
  8148. if (expression_type(args[0]).vecsize == 1)
  8149. {
  8150. // Equivalent to length(a - b) -> abs(a - b).
  8151. emit_op(result_type, id,
  8152. join("abs(", to_enclosed_unpacked_expression(args[0]), " - ",
  8153. to_enclosed_unpacked_expression(args[1]), ")"),
  8154. should_forward(args[0]) && should_forward(args[1]));
  8155. inherit_expression_dependencies(id, args[0]);
  8156. inherit_expression_dependencies(id, args[1]);
  8157. }
  8158. else
  8159. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8160. break;
  8161. case GLSLstd450Length:
  8162. // MSL does not support scalar versions, so use abs().
  8163. if (expression_type(args[0]).vecsize == 1)
  8164. emit_unary_func_op(result_type, id, args[0], "abs");
  8165. else
  8166. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8167. break;
  8168. case GLSLstd450Normalize:
  8169. // MSL does not support scalar versions here.
  8170. // Returns -1 or 1 for valid input, sign() does the job.
  8171. if (expression_type(args[0]).vecsize == 1)
  8172. emit_unary_func_op(result_type, id, args[0], "sign");
  8173. else
  8174. emit_unary_func_op(result_type, id, args[0], "fast::normalize");
  8175. break;
  8176. case GLSLstd450Reflect:
  8177. if (get<SPIRType>(result_type).vecsize == 1)
  8178. emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
  8179. else
  8180. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8181. break;
  8182. case GLSLstd450Refract:
  8183. if (get<SPIRType>(result_type).vecsize == 1)
  8184. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
  8185. else
  8186. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8187. break;
  8188. case GLSLstd450FaceForward:
  8189. if (get<SPIRType>(result_type).vecsize == 1)
  8190. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
  8191. else
  8192. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8193. break;
  8194. case GLSLstd450Modf:
  8195. case GLSLstd450Frexp:
  8196. {
  8197. // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary.
  8198. // Another special case is if the variable is in a storage class which is not thread.
  8199. auto *ptr = maybe_get<SPIRExpression>(args[1]);
  8200. auto &type = expression_type(args[1]);
  8201. bool is_thread_storage = storage_class_array_is_thread(type.storage);
  8202. if (type.storage == StorageClassOutput && capture_output_to_buffer)
  8203. is_thread_storage = false;
  8204. if (!is_thread_storage ||
  8205. (ptr && ptr->access_chain && is_scalar(expression_type(args[1]))))
  8206. {
  8207. register_call_out_argument(args[1]);
  8208. forced_temporaries.insert(id);
  8209. // Need to create temporaries and copy over to access chain after.
  8210. // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ...
  8211. uint32_t &tmp_id = extra_sub_expressions[id];
  8212. if (!tmp_id)
  8213. tmp_id = ir.increase_bound_by(1);
  8214. uint32_t tmp_type_id = get_pointee_type_id(expression_type_id(args[1]));
  8215. emit_uninitialized_temporary_expression(tmp_type_id, tmp_id);
  8216. emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp");
  8217. statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";");
  8218. }
  8219. else
  8220. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8221. break;
  8222. }
  8223. default:
  8224. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  8225. break;
  8226. }
  8227. }
  8228. void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  8229. const uint32_t *args, uint32_t count)
  8230. {
  8231. enum AMDShaderTrinaryMinMax
  8232. {
  8233. FMin3AMD = 1,
  8234. UMin3AMD = 2,
  8235. SMin3AMD = 3,
  8236. FMax3AMD = 4,
  8237. UMax3AMD = 5,
  8238. SMax3AMD = 6,
  8239. FMid3AMD = 7,
  8240. UMid3AMD = 8,
  8241. SMid3AMD = 9
  8242. };
  8243. if (!msl_options.supports_msl_version(2, 1))
  8244. SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1.");
  8245. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  8246. switch (op)
  8247. {
  8248. case FMid3AMD:
  8249. case UMid3AMD:
  8250. case SMid3AMD:
  8251. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3");
  8252. break;
  8253. default:
  8254. CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count);
  8255. break;
  8256. }
  8257. }
  8258. // Emit a structure declaration for the specified interface variable.
  8259. void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
  8260. {
  8261. if (ib_var_id)
  8262. {
  8263. auto &ib_var = get<SPIRVariable>(ib_var_id);
  8264. auto &ib_type = get_variable_data_type(ib_var);
  8265. //assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty());
  8266. assert(ib_type.basetype == SPIRType::Struct);
  8267. emit_struct(ib_type);
  8268. }
  8269. }
  8270. // Emits the declaration signature of the specified function.
  8271. // If this is the entry point function, Metal-specific return value and function arguments are added.
  8272. void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
  8273. {
  8274. if (func.self != ir.default_entry_point)
  8275. add_function_overload(func);
  8276. local_variable_names = resource_names;
  8277. string decl;
  8278. processing_entry_point = func.self == ir.default_entry_point;
  8279. // Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib.
  8280. if (!processing_entry_point)
  8281. statement(force_inline);
  8282. auto &type = get<SPIRType>(func.return_type);
  8283. if (!type.array.empty() && msl_options.force_native_arrays)
  8284. {
  8285. // We cannot return native arrays in MSL, so "return" through an out variable.
  8286. decl += "void";
  8287. }
  8288. else
  8289. {
  8290. decl += func_type_decl(type);
  8291. }
  8292. decl += " ";
  8293. decl += to_name(func.self);
  8294. decl += "(";
  8295. if (!type.array.empty() && msl_options.force_native_arrays)
  8296. {
  8297. // Fake arrays returns by writing to an out array instead.
  8298. decl += "thread ";
  8299. decl += type_to_glsl(type);
  8300. decl += " (&spvReturnValue)";
  8301. decl += type_to_array_glsl(type);
  8302. if (!func.arguments.empty())
  8303. decl += ", ";
  8304. }
  8305. if (processing_entry_point)
  8306. {
  8307. if (msl_options.argument_buffers)
  8308. decl += entry_point_args_argument_buffer(!func.arguments.empty());
  8309. else
  8310. decl += entry_point_args_classic(!func.arguments.empty());
  8311. // If entry point function has variables that require early declaration,
  8312. // ensure they each have an empty initializer, creating one if needed.
  8313. // This is done at this late stage because the initialization expression
  8314. // is cleared after each compilation pass.
  8315. for (auto var_id : vars_needing_early_declaration)
  8316. {
  8317. auto &ed_var = get<SPIRVariable>(var_id);
  8318. ID &initializer = ed_var.initializer;
  8319. if (!initializer)
  8320. initializer = ir.increase_bound_by(1);
  8321. // Do not override proper initializers.
  8322. if (ir.ids[initializer].get_type() == TypeNone || ir.ids[initializer].get_type() == TypeExpression)
  8323. set<SPIRExpression>(ed_var.initializer, "{}", ed_var.basetype, true);
  8324. }
  8325. }
  8326. for (auto &arg : func.arguments)
  8327. {
  8328. uint32_t name_id = arg.id;
  8329. auto *var = maybe_get<SPIRVariable>(arg.id);
  8330. if (var)
  8331. {
  8332. // If we need to modify the name of the variable, make sure we modify the original variable.
  8333. // Our alias is just a shadow variable.
  8334. if (arg.alias_global_variable && var->basevariable)
  8335. name_id = var->basevariable;
  8336. var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  8337. }
  8338. add_local_variable_name(name_id);
  8339. decl += argument_decl(arg);
  8340. bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  8341. auto &arg_type = get<SPIRType>(arg.type);
  8342. if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler)
  8343. {
  8344. // Manufacture automatic plane args for multiplanar texture
  8345. uint32_t planes = 1;
  8346. if (auto *constexpr_sampler = find_constexpr_sampler(name_id))
  8347. if (constexpr_sampler->ycbcr_conversion_enable)
  8348. planes = constexpr_sampler->planes;
  8349. for (uint32_t i = 1; i < planes; i++)
  8350. decl += join(", ", argument_decl(arg), plane_name_suffix, i);
  8351. // Manufacture automatic sampler arg for SampledImage texture
  8352. if (arg_type.image.dim != DimBuffer)
  8353. decl += join(", thread const ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id));
  8354. }
  8355. // Manufacture automatic swizzle arg.
  8356. if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) &&
  8357. !is_dynamic_img_sampler)
  8358. {
  8359. bool arg_is_array = !arg_type.array.empty();
  8360. decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
  8361. }
  8362. if (buffers_requiring_array_length.count(name_id))
  8363. {
  8364. bool arg_is_array = !arg_type.array.empty();
  8365. decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id));
  8366. }
  8367. if (&arg != &func.arguments.back())
  8368. decl += ", ";
  8369. }
  8370. decl += ")";
  8371. statement(decl);
  8372. }
  8373. static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler)
  8374. {
  8375. // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images
  8376. // use implicit reconstruction.
  8377. return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1;
  8378. }
  8379. // Returns the texture sampling function string for the specified image and sampling characteristics.
  8380. string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
  8381. {
  8382. VariableID img = args.base.img;
  8383. auto &imgtype = *args.base.imgtype;
  8384. const MSLConstexprSampler *constexpr_sampler = nullptr;
  8385. bool is_dynamic_img_sampler = false;
  8386. if (auto *var = maybe_get_backing_variable(img))
  8387. {
  8388. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  8389. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  8390. }
  8391. // Special-case gather. We have to alter the component being looked up
  8392. // in the swizzle case.
  8393. if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler &&
  8394. (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
  8395. {
  8396. add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle);
  8397. return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
  8398. }
  8399. auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  8400. // Texture reference
  8401. string fname;
  8402. if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler)
  8403. {
  8404. if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3)
  8405. SPIRV_CROSS_THROW("Unhandled number of color image planes!");
  8406. // 444 images aren't downsampled, so we don't need to do linear filtering.
  8407. if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 ||
  8408. constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST)
  8409. {
  8410. if (constexpr_sampler->planes == 2)
  8411. add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane);
  8412. else
  8413. add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane);
  8414. fname = "spvChromaReconstructNearest";
  8415. }
  8416. else // Linear with a downsampled format
  8417. {
  8418. fname = "spvChromaReconstructLinear";
  8419. switch (constexpr_sampler->resolution)
  8420. {
  8421. case MSL_FORMAT_RESOLUTION_444:
  8422. assert(false);
  8423. break; // not reached
  8424. case MSL_FORMAT_RESOLUTION_422:
  8425. switch (constexpr_sampler->x_chroma_offset)
  8426. {
  8427. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  8428. if (constexpr_sampler->planes == 2)
  8429. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane);
  8430. else
  8431. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane);
  8432. fname += "422CositedEven";
  8433. break;
  8434. case MSL_CHROMA_LOCATION_MIDPOINT:
  8435. if (constexpr_sampler->planes == 2)
  8436. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane);
  8437. else
  8438. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane);
  8439. fname += "422Midpoint";
  8440. break;
  8441. default:
  8442. SPIRV_CROSS_THROW("Invalid chroma location.");
  8443. }
  8444. break;
  8445. case MSL_FORMAT_RESOLUTION_420:
  8446. fname += "420";
  8447. switch (constexpr_sampler->x_chroma_offset)
  8448. {
  8449. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  8450. switch (constexpr_sampler->y_chroma_offset)
  8451. {
  8452. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  8453. if (constexpr_sampler->planes == 2)
  8454. add_spv_func_and_recompile(
  8455. SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane);
  8456. else
  8457. add_spv_func_and_recompile(
  8458. SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane);
  8459. fname += "XCositedEvenYCositedEven";
  8460. break;
  8461. case MSL_CHROMA_LOCATION_MIDPOINT:
  8462. if (constexpr_sampler->planes == 2)
  8463. add_spv_func_and_recompile(
  8464. SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane);
  8465. else
  8466. add_spv_func_and_recompile(
  8467. SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane);
  8468. fname += "XCositedEvenYMidpoint";
  8469. break;
  8470. default:
  8471. SPIRV_CROSS_THROW("Invalid Y chroma location.");
  8472. }
  8473. break;
  8474. case MSL_CHROMA_LOCATION_MIDPOINT:
  8475. switch (constexpr_sampler->y_chroma_offset)
  8476. {
  8477. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  8478. if (constexpr_sampler->planes == 2)
  8479. add_spv_func_and_recompile(
  8480. SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane);
  8481. else
  8482. add_spv_func_and_recompile(
  8483. SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane);
  8484. fname += "XMidpointYCositedEven";
  8485. break;
  8486. case MSL_CHROMA_LOCATION_MIDPOINT:
  8487. if (constexpr_sampler->planes == 2)
  8488. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane);
  8489. else
  8490. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane);
  8491. fname += "XMidpointYMidpoint";
  8492. break;
  8493. default:
  8494. SPIRV_CROSS_THROW("Invalid Y chroma location.");
  8495. }
  8496. break;
  8497. default:
  8498. SPIRV_CROSS_THROW("Invalid X chroma location.");
  8499. }
  8500. break;
  8501. default:
  8502. SPIRV_CROSS_THROW("Invalid format resolution.");
  8503. }
  8504. }
  8505. }
  8506. else
  8507. {
  8508. fname = to_expression(combined ? combined->image : img) + ".";
  8509. // Texture function and sampler
  8510. if (args.base.is_fetch)
  8511. fname += "read";
  8512. else if (args.base.is_gather)
  8513. fname += "gather";
  8514. else
  8515. fname += "sample";
  8516. if (args.has_dref)
  8517. fname += "_compare";
  8518. }
  8519. return fname;
  8520. }
  8521. string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
  8522. {
  8523. SPIRType t;
  8524. t.basetype = SPIRType::Float;
  8525. t.vecsize = components;
  8526. t.columns = 1;
  8527. return join(type_to_glsl_constructor(t), "(", expr, ")");
  8528. }
  8529. static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
  8530. {
  8531. // Double is not supported to begin with, but doesn't hurt to check for completion.
  8532. return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
  8533. }
  8534. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  8535. string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  8536. {
  8537. VariableID img = args.base.img;
  8538. auto &imgtype = *args.base.imgtype;
  8539. uint32_t lod = args.lod;
  8540. uint32_t grad_x = args.grad_x;
  8541. uint32_t grad_y = args.grad_y;
  8542. uint32_t bias = args.bias;
  8543. const MSLConstexprSampler *constexpr_sampler = nullptr;
  8544. bool is_dynamic_img_sampler = false;
  8545. if (auto *var = maybe_get_backing_variable(img))
  8546. {
  8547. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  8548. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  8549. }
  8550. string farg_str;
  8551. bool forward = true;
  8552. if (!is_dynamic_img_sampler)
  8553. {
  8554. // Texture reference (for some cases)
  8555. if (needs_chroma_reconstruction(constexpr_sampler))
  8556. {
  8557. // Multiplanar images need two or three textures.
  8558. farg_str += to_expression(img);
  8559. for (uint32_t i = 1; i < constexpr_sampler->planes; i++)
  8560. farg_str += join(", ", to_expression(img), plane_name_suffix, i);
  8561. }
  8562. else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
  8563. msl_options.swizzle_texture_samples && args.base.is_gather)
  8564. {
  8565. auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  8566. farg_str += to_expression(combined ? combined->image : img);
  8567. }
  8568. // Sampler reference
  8569. if (!args.base.is_fetch)
  8570. {
  8571. if (!farg_str.empty())
  8572. farg_str += ", ";
  8573. farg_str += to_sampler_expression(img);
  8574. }
  8575. if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
  8576. msl_options.swizzle_texture_samples && args.base.is_gather)
  8577. {
  8578. // Add the swizzle constant from the swizzle buffer.
  8579. farg_str += ", " + to_swizzle_expression(img);
  8580. used_swizzle_buffer = true;
  8581. }
  8582. // Swizzled gather puts the component before the other args, to allow template
  8583. // deduction to work.
  8584. if (args.component && msl_options.swizzle_texture_samples)
  8585. {
  8586. forward = should_forward(args.component);
  8587. farg_str += ", " + to_component_argument(args.component);
  8588. }
  8589. }
  8590. // Texture coordinates
  8591. forward = forward && should_forward(args.coord);
  8592. auto coord_expr = to_enclosed_expression(args.coord);
  8593. auto &coord_type = expression_type(args.coord);
  8594. bool coord_is_fp = type_is_floating_point(coord_type);
  8595. bool is_cube_fetch = false;
  8596. string tex_coords = coord_expr;
  8597. uint32_t alt_coord_component = 0;
  8598. switch (imgtype.image.dim)
  8599. {
  8600. case Dim1D:
  8601. if (coord_type.vecsize > 1)
  8602. tex_coords = enclose_expression(tex_coords) + ".x";
  8603. if (args.base.is_fetch)
  8604. tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8605. else if (sampling_type_needs_f32_conversion(coord_type))
  8606. tex_coords = convert_to_f32(tex_coords, 1);
  8607. if (msl_options.texture_1D_as_2D)
  8608. {
  8609. if (args.base.is_fetch)
  8610. tex_coords = "uint2(" + tex_coords + ", 0)";
  8611. else
  8612. tex_coords = "float2(" + tex_coords + ", 0.5)";
  8613. }
  8614. alt_coord_component = 1;
  8615. break;
  8616. case DimBuffer:
  8617. if (coord_type.vecsize > 1)
  8618. tex_coords = enclose_expression(tex_coords) + ".x";
  8619. if (msl_options.texture_buffer_native)
  8620. {
  8621. tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8622. }
  8623. else
  8624. {
  8625. // Metal texel buffer textures are 2D, so convert 1D coord to 2D.
  8626. // Support for Metal 2.1's new texture_buffer type.
  8627. if (args.base.is_fetch)
  8628. {
  8629. if (msl_options.texel_buffer_texture_width > 0)
  8630. {
  8631. tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8632. }
  8633. else
  8634. {
  8635. tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " +
  8636. to_expression(img) + ")";
  8637. }
  8638. }
  8639. }
  8640. alt_coord_component = 1;
  8641. break;
  8642. case DimSubpassData:
  8643. // If we're using Metal's native frame-buffer fetch API for subpass inputs,
  8644. // this path will not be hit.
  8645. tex_coords = "uint2(gl_FragCoord.xy)";
  8646. alt_coord_component = 2;
  8647. break;
  8648. case Dim2D:
  8649. if (coord_type.vecsize > 2)
  8650. tex_coords = enclose_expression(tex_coords) + ".xy";
  8651. if (args.base.is_fetch)
  8652. tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8653. else if (sampling_type_needs_f32_conversion(coord_type))
  8654. tex_coords = convert_to_f32(tex_coords, 2);
  8655. alt_coord_component = 2;
  8656. break;
  8657. case Dim3D:
  8658. if (coord_type.vecsize > 3)
  8659. tex_coords = enclose_expression(tex_coords) + ".xyz";
  8660. if (args.base.is_fetch)
  8661. tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8662. else if (sampling_type_needs_f32_conversion(coord_type))
  8663. tex_coords = convert_to_f32(tex_coords, 3);
  8664. alt_coord_component = 3;
  8665. break;
  8666. case DimCube:
  8667. if (args.base.is_fetch)
  8668. {
  8669. is_cube_fetch = true;
  8670. tex_coords += ".xy";
  8671. tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  8672. }
  8673. else
  8674. {
  8675. if (coord_type.vecsize > 3)
  8676. tex_coords = enclose_expression(tex_coords) + ".xyz";
  8677. }
  8678. if (sampling_type_needs_f32_conversion(coord_type))
  8679. tex_coords = convert_to_f32(tex_coords, 3);
  8680. alt_coord_component = 3;
  8681. break;
  8682. default:
  8683. break;
  8684. }
  8685. if (args.base.is_fetch && (args.offset || args.coffset))
  8686. {
  8687. uint32_t offset_expr = args.offset ? args.offset : args.coffset;
  8688. // Fetch offsets must be applied directly to the coordinate.
  8689. forward = forward && should_forward(offset_expr);
  8690. auto &type = expression_type(offset_expr);
  8691. if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
  8692. {
  8693. if (type.basetype != SPIRType::UInt)
  8694. tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)");
  8695. else
  8696. tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)");
  8697. }
  8698. else
  8699. {
  8700. if (type.basetype != SPIRType::UInt)
  8701. tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr);
  8702. else
  8703. tex_coords += " + " + to_enclosed_expression(offset_expr);
  8704. }
  8705. }
  8706. // If projection, use alt coord as divisor
  8707. if (args.base.is_proj)
  8708. {
  8709. if (sampling_type_needs_f32_conversion(coord_type))
  8710. tex_coords += " / " + convert_to_f32(to_extract_component_expression(args.coord, alt_coord_component), 1);
  8711. else
  8712. tex_coords += " / " + to_extract_component_expression(args.coord, alt_coord_component);
  8713. }
  8714. if (!farg_str.empty())
  8715. farg_str += ", ";
  8716. if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array)
  8717. {
  8718. farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy";
  8719. if (is_cube_fetch)
  8720. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ")";
  8721. else
  8722. farg_str +=
  8723. ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" +
  8724. round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
  8725. ") * 6u)";
  8726. add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace);
  8727. }
  8728. else
  8729. {
  8730. farg_str += tex_coords;
  8731. // If fetch from cube, add face explicitly
  8732. if (is_cube_fetch)
  8733. {
  8734. // Special case for cube arrays, face and layer are packed in one dimension.
  8735. if (imgtype.image.arrayed)
  8736. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") % 6u";
  8737. else
  8738. farg_str +=
  8739. ", uint(" + round_fp_tex_coords(to_extract_component_expression(args.coord, 2), coord_is_fp) + ")";
  8740. }
  8741. // If array, use alt coord
  8742. if (imgtype.image.arrayed)
  8743. {
  8744. // Special case for cube arrays, face and layer are packed in one dimension.
  8745. if (imgtype.image.dim == DimCube && args.base.is_fetch)
  8746. {
  8747. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") / 6u";
  8748. }
  8749. else
  8750. {
  8751. farg_str +=
  8752. ", uint(" +
  8753. round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
  8754. ")";
  8755. if (imgtype.image.dim == DimSubpassData)
  8756. {
  8757. if (msl_options.multiview)
  8758. farg_str += " + gl_ViewIndex";
  8759. else if (msl_options.arrayed_subpass_input)
  8760. farg_str += " + gl_Layer";
  8761. }
  8762. }
  8763. }
  8764. else if (imgtype.image.dim == DimSubpassData)
  8765. {
  8766. if (msl_options.multiview)
  8767. farg_str += ", gl_ViewIndex";
  8768. else if (msl_options.arrayed_subpass_input)
  8769. farg_str += ", gl_Layer";
  8770. }
  8771. }
  8772. // Depth compare reference value
  8773. if (args.dref)
  8774. {
  8775. forward = forward && should_forward(args.dref);
  8776. farg_str += ", ";
  8777. auto &dref_type = expression_type(args.dref);
  8778. string dref_expr;
  8779. if (args.base.is_proj)
  8780. dref_expr = join(to_enclosed_expression(args.dref), " / ",
  8781. to_extract_component_expression(args.coord, alt_coord_component));
  8782. else
  8783. dref_expr = to_expression(args.dref);
  8784. if (sampling_type_needs_f32_conversion(dref_type))
  8785. dref_expr = convert_to_f32(dref_expr, 1);
  8786. farg_str += dref_expr;
  8787. if (msl_options.is_macos() && (grad_x || grad_y))
  8788. {
  8789. // For sample compare, MSL does not support gradient2d for all targets (only iOS apparently according to docs).
  8790. // However, the most common case here is to have a constant gradient of 0, as that is the only way to express
  8791. // LOD == 0 in GLSL with sampler2DArrayShadow (cascaded shadow mapping).
  8792. // We will detect a compile-time constant 0 value for gradient and promote that to level(0) on MSL.
  8793. bool constant_zero_x = !grad_x || expression_is_constant_null(grad_x);
  8794. bool constant_zero_y = !grad_y || expression_is_constant_null(grad_y);
  8795. if (constant_zero_x && constant_zero_y)
  8796. {
  8797. lod = 0;
  8798. grad_x = 0;
  8799. grad_y = 0;
  8800. farg_str += ", level(0)";
  8801. }
  8802. else if (!msl_options.supports_msl_version(2, 3))
  8803. {
  8804. SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not "
  8805. "supported on macOS prior to MSL 2.3.");
  8806. }
  8807. }
  8808. if (msl_options.is_macos() && bias)
  8809. {
  8810. // Bias is not supported either on macOS with sample_compare.
  8811. // Verify it is compile-time zero, and drop the argument.
  8812. if (expression_is_constant_null(bias))
  8813. {
  8814. bias = 0;
  8815. }
  8816. else if (!msl_options.supports_msl_version(2, 3))
  8817. {
  8818. SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported "
  8819. "on macOS prior to MSL 2.3.");
  8820. }
  8821. }
  8822. }
  8823. // LOD Options
  8824. // Metal does not support LOD for 1D textures.
  8825. if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  8826. {
  8827. forward = forward && should_forward(bias);
  8828. farg_str += ", bias(" + to_expression(bias) + ")";
  8829. }
  8830. // Metal does not support LOD for 1D textures.
  8831. if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  8832. {
  8833. forward = forward && should_forward(lod);
  8834. if (args.base.is_fetch)
  8835. {
  8836. farg_str += ", " + to_expression(lod);
  8837. }
  8838. else
  8839. {
  8840. farg_str += ", level(" + to_expression(lod) + ")";
  8841. }
  8842. }
  8843. else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) &&
  8844. imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2)
  8845. {
  8846. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  8847. // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL.
  8848. farg_str += ", 0";
  8849. }
  8850. // Metal does not support LOD for 1D textures.
  8851. if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  8852. {
  8853. forward = forward && should_forward(grad_x);
  8854. forward = forward && should_forward(grad_y);
  8855. string grad_opt;
  8856. switch (imgtype.image.dim)
  8857. {
  8858. case Dim1D:
  8859. case Dim2D:
  8860. grad_opt = "2d";
  8861. break;
  8862. case Dim3D:
  8863. grad_opt = "3d";
  8864. break;
  8865. case DimCube:
  8866. if (imgtype.image.arrayed && msl_options.emulate_cube_array)
  8867. grad_opt = "2d";
  8868. else
  8869. grad_opt = "cube";
  8870. break;
  8871. default:
  8872. grad_opt = "unsupported_gradient_dimension";
  8873. break;
  8874. }
  8875. farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")";
  8876. }
  8877. if (args.min_lod)
  8878. {
  8879. if (!msl_options.supports_msl_version(2, 2))
  8880. SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up.");
  8881. forward = forward && should_forward(args.min_lod);
  8882. farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")";
  8883. }
  8884. // Add offsets
  8885. string offset_expr;
  8886. const SPIRType *offset_type = nullptr;
  8887. if (args.coffset && !args.base.is_fetch)
  8888. {
  8889. forward = forward && should_forward(args.coffset);
  8890. offset_expr = to_expression(args.coffset);
  8891. offset_type = &expression_type(args.coffset);
  8892. }
  8893. else if (args.offset && !args.base.is_fetch)
  8894. {
  8895. forward = forward && should_forward(args.offset);
  8896. offset_expr = to_expression(args.offset);
  8897. offset_type = &expression_type(args.offset);
  8898. }
  8899. if (!offset_expr.empty())
  8900. {
  8901. switch (imgtype.image.dim)
  8902. {
  8903. case Dim1D:
  8904. if (!msl_options.texture_1D_as_2D)
  8905. break;
  8906. if (offset_type->vecsize > 1)
  8907. offset_expr = enclose_expression(offset_expr) + ".x";
  8908. farg_str += join(", int2(", offset_expr, ", 0)");
  8909. break;
  8910. case Dim2D:
  8911. if (offset_type->vecsize > 2)
  8912. offset_expr = enclose_expression(offset_expr) + ".xy";
  8913. farg_str += ", " + offset_expr;
  8914. break;
  8915. case Dim3D:
  8916. if (offset_type->vecsize > 3)
  8917. offset_expr = enclose_expression(offset_expr) + ".xyz";
  8918. farg_str += ", " + offset_expr;
  8919. break;
  8920. default:
  8921. break;
  8922. }
  8923. }
  8924. if (args.component)
  8925. {
  8926. // If 2D has gather component, ensure it also has an offset arg
  8927. if (imgtype.image.dim == Dim2D && offset_expr.empty())
  8928. farg_str += ", int2(0)";
  8929. if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
  8930. {
  8931. forward = forward && should_forward(args.component);
  8932. uint32_t image_var = 0;
  8933. if (const auto *combined = maybe_get<SPIRCombinedImageSampler>(img))
  8934. {
  8935. if (const auto *img_var = maybe_get_backing_variable(combined->image))
  8936. image_var = img_var->self;
  8937. }
  8938. else if (const auto *var = maybe_get_backing_variable(img))
  8939. {
  8940. image_var = var->self;
  8941. }
  8942. if (image_var == 0 || !image_is_comparison(expression_type(image_var), image_var))
  8943. farg_str += ", " + to_component_argument(args.component);
  8944. }
  8945. }
  8946. if (args.sample)
  8947. {
  8948. forward = forward && should_forward(args.sample);
  8949. farg_str += ", ";
  8950. farg_str += to_expression(args.sample);
  8951. }
  8952. *p_forward = forward;
  8953. return farg_str;
  8954. }
  8955. // If the texture coordinates are floating point, invokes MSL round() function to round them.
  8956. string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp)
  8957. {
  8958. return coord_is_fp ? ("round(" + tex_coords + ")") : tex_coords;
  8959. }
  8960. // Returns a string to use in an image sampling function argument.
  8961. // The ID must be a scalar constant.
  8962. string CompilerMSL::to_component_argument(uint32_t id)
  8963. {
  8964. uint32_t component_index = evaluate_constant_u32(id);
  8965. switch (component_index)
  8966. {
  8967. case 0:
  8968. return "component::x";
  8969. case 1:
  8970. return "component::y";
  8971. case 2:
  8972. return "component::z";
  8973. case 3:
  8974. return "component::w";
  8975. default:
  8976. SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) +
  8977. " is not a valid Component index, which must be one of 0, 1, 2, or 3.");
  8978. }
  8979. }
  8980. // Establish sampled image as expression object and assign the sampler to it.
  8981. void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  8982. {
  8983. set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
  8984. }
  8985. string CompilerMSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  8986. SmallVector<uint32_t> &inherited_expressions)
  8987. {
  8988. auto *ops = stream(i);
  8989. uint32_t result_type_id = ops[0];
  8990. uint32_t img = ops[2];
  8991. auto &result_type = get<SPIRType>(result_type_id);
  8992. auto op = static_cast<Op>(i.op);
  8993. bool is_gather = (op == OpImageGather || op == OpImageDrefGather);
  8994. // Bypass pointers because we need the real image struct
  8995. auto &type = expression_type(img);
  8996. auto &imgtype = get<SPIRType>(type.self);
  8997. const MSLConstexprSampler *constexpr_sampler = nullptr;
  8998. bool is_dynamic_img_sampler = false;
  8999. if (auto *var = maybe_get_backing_variable(img))
  9000. {
  9001. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  9002. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  9003. }
  9004. string expr;
  9005. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
  9006. {
  9007. // If this needs sampler Y'CbCr conversion, we need to do some additional
  9008. // processing.
  9009. switch (constexpr_sampler->ycbcr_model)
  9010. {
  9011. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
  9012. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
  9013. // Default
  9014. break;
  9015. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
  9016. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709);
  9017. expr += "spvConvertYCbCrBT709(";
  9018. break;
  9019. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
  9020. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601);
  9021. expr += "spvConvertYCbCrBT601(";
  9022. break;
  9023. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
  9024. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020);
  9025. expr += "spvConvertYCbCrBT2020(";
  9026. break;
  9027. default:
  9028. SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
  9029. }
  9030. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
  9031. {
  9032. switch (constexpr_sampler->ycbcr_range)
  9033. {
  9034. case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL:
  9035. add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange);
  9036. expr += "spvExpandITUFullRange(";
  9037. break;
  9038. case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW:
  9039. add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange);
  9040. expr += "spvExpandITUNarrowRange(";
  9041. break;
  9042. default:
  9043. SPIRV_CROSS_THROW("Invalid Y'CbCr range.");
  9044. }
  9045. }
  9046. }
  9047. else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
  9048. !is_dynamic_img_sampler)
  9049. {
  9050. add_spv_func_and_recompile(SPVFuncImplTextureSwizzle);
  9051. expr += "spvTextureSwizzle(";
  9052. }
  9053. string inner_expr = CompilerGLSL::to_texture_op(i, sparse, forward, inherited_expressions);
  9054. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
  9055. {
  9056. if (!constexpr_sampler->swizzle_is_identity())
  9057. {
  9058. static const char swizzle_names[] = "rgba";
  9059. if (!constexpr_sampler->swizzle_has_one_or_zero())
  9060. {
  9061. // If we can, do it inline.
  9062. expr += inner_expr + ".";
  9063. for (uint32_t c = 0; c < 4; c++)
  9064. {
  9065. switch (constexpr_sampler->swizzle[c])
  9066. {
  9067. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  9068. expr += swizzle_names[c];
  9069. break;
  9070. case MSL_COMPONENT_SWIZZLE_R:
  9071. case MSL_COMPONENT_SWIZZLE_G:
  9072. case MSL_COMPONENT_SWIZZLE_B:
  9073. case MSL_COMPONENT_SWIZZLE_A:
  9074. expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
  9075. break;
  9076. default:
  9077. SPIRV_CROSS_THROW("Invalid component swizzle.");
  9078. }
  9079. }
  9080. }
  9081. else
  9082. {
  9083. // Otherwise, we need to emit a temporary and swizzle that.
  9084. uint32_t temp_id = ir.increase_bound_by(1);
  9085. emit_op(result_type_id, temp_id, inner_expr, false);
  9086. for (auto &inherit : inherited_expressions)
  9087. inherit_expression_dependencies(temp_id, inherit);
  9088. inherited_expressions.clear();
  9089. inherited_expressions.push_back(temp_id);
  9090. switch (op)
  9091. {
  9092. case OpImageSampleDrefImplicitLod:
  9093. case OpImageSampleImplicitLod:
  9094. case OpImageSampleProjImplicitLod:
  9095. case OpImageSampleProjDrefImplicitLod:
  9096. register_control_dependent_expression(temp_id);
  9097. break;
  9098. default:
  9099. break;
  9100. }
  9101. expr += type_to_glsl(result_type) + "(";
  9102. for (uint32_t c = 0; c < 4; c++)
  9103. {
  9104. switch (constexpr_sampler->swizzle[c])
  9105. {
  9106. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  9107. expr += to_expression(temp_id) + "." + swizzle_names[c];
  9108. break;
  9109. case MSL_COMPONENT_SWIZZLE_ZERO:
  9110. expr += "0";
  9111. break;
  9112. case MSL_COMPONENT_SWIZZLE_ONE:
  9113. expr += "1";
  9114. break;
  9115. case MSL_COMPONENT_SWIZZLE_R:
  9116. case MSL_COMPONENT_SWIZZLE_G:
  9117. case MSL_COMPONENT_SWIZZLE_B:
  9118. case MSL_COMPONENT_SWIZZLE_A:
  9119. expr += to_expression(temp_id) + "." +
  9120. swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
  9121. break;
  9122. default:
  9123. SPIRV_CROSS_THROW("Invalid component swizzle.");
  9124. }
  9125. if (c < 3)
  9126. expr += ", ";
  9127. }
  9128. expr += ")";
  9129. }
  9130. }
  9131. else
  9132. expr += inner_expr;
  9133. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
  9134. {
  9135. expr += join(", ", constexpr_sampler->bpc, ")");
  9136. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
  9137. expr += ")";
  9138. }
  9139. }
  9140. else
  9141. {
  9142. expr += inner_expr;
  9143. if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
  9144. !is_dynamic_img_sampler)
  9145. {
  9146. // Add the swizzle constant from the swizzle buffer.
  9147. expr += ", " + to_swizzle_expression(img) + ")";
  9148. used_swizzle_buffer = true;
  9149. }
  9150. }
  9151. return expr;
  9152. }
  9153. static string create_swizzle(MSLComponentSwizzle swizzle)
  9154. {
  9155. switch (swizzle)
  9156. {
  9157. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  9158. return "spvSwizzle::none";
  9159. case MSL_COMPONENT_SWIZZLE_ZERO:
  9160. return "spvSwizzle::zero";
  9161. case MSL_COMPONENT_SWIZZLE_ONE:
  9162. return "spvSwizzle::one";
  9163. case MSL_COMPONENT_SWIZZLE_R:
  9164. return "spvSwizzle::red";
  9165. case MSL_COMPONENT_SWIZZLE_G:
  9166. return "spvSwizzle::green";
  9167. case MSL_COMPONENT_SWIZZLE_B:
  9168. return "spvSwizzle::blue";
  9169. case MSL_COMPONENT_SWIZZLE_A:
  9170. return "spvSwizzle::alpha";
  9171. default:
  9172. SPIRV_CROSS_THROW("Invalid component swizzle.");
  9173. }
  9174. }
  9175. // Returns a string representation of the ID, usable as a function arg.
  9176. // Manufacture automatic sampler arg for SampledImage texture.
  9177. string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  9178. {
  9179. string arg_str;
  9180. auto &type = expression_type(id);
  9181. bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  9182. // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around.
  9183. bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler);
  9184. if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
  9185. arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
  9186. auto *c = maybe_get<SPIRConstant>(id);
  9187. if (msl_options.force_native_arrays && c && !get<SPIRType>(c->constant_type).array.empty())
  9188. {
  9189. // If we are passing a constant array directly to a function for some reason,
  9190. // the callee will expect an argument in thread const address space
  9191. // (since we can only bind to arrays with references in MSL).
  9192. // To resolve this, we must emit a copy in this address space.
  9193. // This kind of code gen should be rare enough that performance is not a real concern.
  9194. // Inline the SPIR-V to avoid this kind of suboptimal codegen.
  9195. //
  9196. // We risk calling this inside a continue block (invalid code),
  9197. // so just create a thread local copy in the current function.
  9198. arg_str = join("_", id, "_array_copy");
  9199. auto &constants = current_function->constant_arrays_needed_on_stack;
  9200. auto itr = find(begin(constants), end(constants), ID(id));
  9201. if (itr == end(constants))
  9202. {
  9203. force_recompile();
  9204. constants.push_back(id);
  9205. }
  9206. }
  9207. else
  9208. arg_str += CompilerGLSL::to_func_call_arg(arg, id);
  9209. // Need to check the base variable in case we need to apply a qualified alias.
  9210. uint32_t var_id = 0;
  9211. auto *var = maybe_get<SPIRVariable>(id);
  9212. if (var)
  9213. var_id = var->basevariable;
  9214. if (!arg_is_dynamic_img_sampler)
  9215. {
  9216. auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id);
  9217. if (type.basetype == SPIRType::SampledImage)
  9218. {
  9219. // Manufacture automatic plane args for multiplanar texture
  9220. uint32_t planes = 1;
  9221. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  9222. {
  9223. planes = constexpr_sampler->planes;
  9224. // If this parameter isn't aliasing a global, then we need to use
  9225. // the special "dynamic image-sampler" class to pass it--and we need
  9226. // to use it for *every* non-alias parameter, in case a combined
  9227. // image-sampler with a Y'CbCr conversion is passed. Hopefully, this
  9228. // pathological case is so rare that it should never be hit in practice.
  9229. if (!arg.alias_global_variable)
  9230. add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler);
  9231. }
  9232. for (uint32_t i = 1; i < planes; i++)
  9233. arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i);
  9234. // Manufacture automatic sampler arg if the arg is a SampledImage texture.
  9235. if (type.image.dim != DimBuffer)
  9236. arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
  9237. // Add sampler Y'CbCr conversion info if we have it
  9238. if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  9239. {
  9240. SmallVector<string> samp_args;
  9241. switch (constexpr_sampler->resolution)
  9242. {
  9243. case MSL_FORMAT_RESOLUTION_444:
  9244. // Default
  9245. break;
  9246. case MSL_FORMAT_RESOLUTION_422:
  9247. samp_args.push_back("spvFormatResolution::_422");
  9248. break;
  9249. case MSL_FORMAT_RESOLUTION_420:
  9250. samp_args.push_back("spvFormatResolution::_420");
  9251. break;
  9252. default:
  9253. SPIRV_CROSS_THROW("Invalid format resolution.");
  9254. }
  9255. if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST)
  9256. samp_args.push_back("spvChromaFilter::linear");
  9257. if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
  9258. samp_args.push_back("spvXChromaLocation::midpoint");
  9259. if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
  9260. samp_args.push_back("spvYChromaLocation::midpoint");
  9261. switch (constexpr_sampler->ycbcr_model)
  9262. {
  9263. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
  9264. // Default
  9265. break;
  9266. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
  9267. samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity");
  9268. break;
  9269. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
  9270. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709");
  9271. break;
  9272. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
  9273. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601");
  9274. break;
  9275. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
  9276. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020");
  9277. break;
  9278. default:
  9279. SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
  9280. }
  9281. if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL)
  9282. samp_args.push_back("spvYCbCrRange::itu_narrow");
  9283. samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")"));
  9284. arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")");
  9285. }
  9286. }
  9287. if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  9288. arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(",
  9289. create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(",
  9290. create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(",
  9291. create_swizzle(constexpr_sampler->swizzle[0]), ")");
  9292. else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
  9293. arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
  9294. if (buffers_requiring_array_length.count(var_id))
  9295. arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
  9296. if (is_dynamic_img_sampler)
  9297. arg_str += ")";
  9298. }
  9299. // Emulate texture2D atomic operations
  9300. auto *backing_var = maybe_get_backing_variable(var_id);
  9301. if (backing_var && atomic_image_vars.count(backing_var->self))
  9302. {
  9303. arg_str += ", " + to_expression(var_id) + "_atomic";
  9304. }
  9305. return arg_str;
  9306. }
  9307. // If the ID represents a sampled image that has been assigned a sampler already,
  9308. // generate an expression for the sampler, otherwise generate a fake sampler name
  9309. // by appending a suffix to the expression constructed from the ID.
  9310. string CompilerMSL::to_sampler_expression(uint32_t id)
  9311. {
  9312. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  9313. auto expr = to_expression(combined ? combined->image : VariableID(id));
  9314. auto index = expr.find_first_of('[');
  9315. uint32_t samp_id = 0;
  9316. if (combined)
  9317. samp_id = combined->sampler;
  9318. if (index == string::npos)
  9319. return samp_id ? to_expression(samp_id) : expr + sampler_name_suffix;
  9320. else
  9321. {
  9322. auto image_expr = expr.substr(0, index);
  9323. auto array_expr = expr.substr(index);
  9324. return samp_id ? to_expression(samp_id) : (image_expr + sampler_name_suffix + array_expr);
  9325. }
  9326. }
  9327. string CompilerMSL::to_swizzle_expression(uint32_t id)
  9328. {
  9329. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  9330. auto expr = to_expression(combined ? combined->image : VariableID(id));
  9331. auto index = expr.find_first_of('[');
  9332. // If an image is part of an argument buffer translate this to a legal identifier.
  9333. string::size_type period = 0;
  9334. while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
  9335. expr[period] = '_';
  9336. if (index == string::npos)
  9337. return expr + swizzle_name_suffix;
  9338. else
  9339. {
  9340. auto image_expr = expr.substr(0, index);
  9341. auto array_expr = expr.substr(index);
  9342. return image_expr + swizzle_name_suffix + array_expr;
  9343. }
  9344. }
  9345. string CompilerMSL::to_buffer_size_expression(uint32_t id)
  9346. {
  9347. auto expr = to_expression(id);
  9348. auto index = expr.find_first_of('[');
  9349. // This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to
  9350. // the pointer expression spvDescriptorSetN.name to make a reasonable expression here.
  9351. // This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set.
  9352. if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*')
  9353. expr = address_of_expression(expr);
  9354. // If a buffer is part of an argument buffer translate this to a legal identifier.
  9355. for (auto &c : expr)
  9356. if (c == '.')
  9357. c = '_';
  9358. if (index == string::npos)
  9359. return expr + buffer_size_name_suffix;
  9360. else
  9361. {
  9362. auto buffer_expr = expr.substr(0, index);
  9363. auto array_expr = expr.substr(index);
  9364. return buffer_expr + buffer_size_name_suffix + array_expr;
  9365. }
  9366. }
  9367. // Checks whether the type is a Block all of whose members have DecorationPatch.
  9368. bool CompilerMSL::is_patch_block(const SPIRType &type)
  9369. {
  9370. if (!has_decoration(type.self, DecorationBlock))
  9371. return false;
  9372. for (uint32_t i = 0; i < type.member_types.size(); i++)
  9373. {
  9374. if (!has_member_decoration(type.self, i, DecorationPatch))
  9375. return false;
  9376. }
  9377. return true;
  9378. }
  9379. // Checks whether the ID is a row_major matrix that requires conversion before use
  9380. bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
  9381. {
  9382. auto *e = maybe_get<SPIRExpression>(id);
  9383. if (e)
  9384. return e->need_transpose;
  9385. else
  9386. return has_decoration(id, DecorationRowMajor);
  9387. }
  9388. // Checks whether the member is a row_major matrix that requires conversion before use
  9389. bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  9390. {
  9391. return has_member_decoration(type.self, index, DecorationRowMajor);
  9392. }
  9393. string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
  9394. bool is_packed)
  9395. {
  9396. if (!is_matrix(exp_type))
  9397. {
  9398. return CompilerGLSL::convert_row_major_matrix(move(exp_str), exp_type, physical_type_id, is_packed);
  9399. }
  9400. else
  9401. {
  9402. strip_enclosed_expression(exp_str);
  9403. if (physical_type_id != 0 || is_packed)
  9404. exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true);
  9405. return join("transpose(", exp_str, ")");
  9406. }
  9407. }
  9408. // Called automatically at the end of the entry point function
  9409. void CompilerMSL::emit_fixup()
  9410. {
  9411. if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer)
  9412. {
  9413. if (options.vertex.fixup_clipspace)
  9414. statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
  9415. ".w) * 0.5; // Adjust clip-space for Metal");
  9416. if (options.vertex.flip_vert_y)
  9417. statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal");
  9418. }
  9419. }
  9420. // Return a string defining a structure member, with padding and packing.
  9421. string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  9422. const string &qualifier)
  9423. {
  9424. if (member_is_remapped_physical_type(type, index))
  9425. member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  9426. auto &physical_type = get<SPIRType>(member_type_id);
  9427. // If this member is packed, mark it as so.
  9428. string pack_pfx;
  9429. // Allow Metal to use the array<T> template to make arrays a value type
  9430. uint32_t orig_id = 0;
  9431. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))
  9432. orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID);
  9433. bool row_major = false;
  9434. if (is_matrix(physical_type))
  9435. row_major = has_member_decoration(type.self, index, DecorationRowMajor);
  9436. SPIRType row_major_physical_type;
  9437. const SPIRType *declared_type = &physical_type;
  9438. // If a struct is being declared with physical layout,
  9439. // do not use array<T> wrappers.
  9440. // This avoids a lot of complicated cases with packed vectors and matrices,
  9441. // and generally we cannot copy full arrays in and out of buffers into Function
  9442. // address space.
  9443. // Array of resources should also be declared as builtin arrays.
  9444. if (has_member_decoration(type.self, index, DecorationOffset))
  9445. is_using_builtin_array = true;
  9446. else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
  9447. is_using_builtin_array = true;
  9448. if (member_is_packed_physical_type(type, index))
  9449. {
  9450. // If we're packing a matrix, output an appropriate typedef
  9451. if (physical_type.basetype == SPIRType::Struct)
  9452. {
  9453. SPIRV_CROSS_THROW("Cannot emit a packed struct currently.");
  9454. }
  9455. else if (is_matrix(physical_type))
  9456. {
  9457. uint32_t rows = physical_type.vecsize;
  9458. uint32_t cols = physical_type.columns;
  9459. pack_pfx = "packed_";
  9460. if (row_major)
  9461. {
  9462. // These are stored transposed.
  9463. rows = physical_type.columns;
  9464. cols = physical_type.vecsize;
  9465. pack_pfx = "packed_rm_";
  9466. }
  9467. string base_type = physical_type.width == 16 ? "half" : "float";
  9468. string td_line = "typedef ";
  9469. td_line += "packed_" + base_type + to_string(rows);
  9470. td_line += " " + pack_pfx;
  9471. // Use the actual matrix size here.
  9472. td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize);
  9473. td_line += "[" + to_string(cols) + "]";
  9474. td_line += ";";
  9475. add_typedef_line(td_line);
  9476. }
  9477. else if (!is_scalar(physical_type)) // scalar type is already packed.
  9478. pack_pfx = "packed_";
  9479. }
  9480. else if (row_major)
  9481. {
  9482. // Need to declare type with flipped vecsize/columns.
  9483. row_major_physical_type = physical_type;
  9484. swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
  9485. declared_type = &row_major_physical_type;
  9486. }
  9487. // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
  9488. if (msl_options.is_ios() && physical_type.basetype == SPIRType::Image && physical_type.image.sampled == 2)
  9489. {
  9490. if (!has_decoration(orig_id, DecorationNonWritable))
  9491. SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS.");
  9492. }
  9493. // Array information is baked into these types.
  9494. string array_type;
  9495. if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler &&
  9496. physical_type.basetype != SPIRType::SampledImage)
  9497. {
  9498. BuiltIn builtin = BuiltInMax;
  9499. // Special handling. In [[stage_out]] or [[stage_in]] blocks,
  9500. // we need flat arrays, but if we're somehow declaring gl_PerVertex for constant array reasons, we want
  9501. // template array types to be declared.
  9502. bool is_ib_in_out =
  9503. ((stage_out_var_id && get_stage_out_struct_type().self == type.self &&
  9504. variable_storage_requires_stage_io(StorageClassOutput)) ||
  9505. (stage_in_var_id && get_stage_in_struct_type().self == type.self &&
  9506. variable_storage_requires_stage_io(StorageClassInput)));
  9507. if (is_ib_in_out && is_member_builtin(type, index, &builtin))
  9508. is_using_builtin_array = true;
  9509. array_type = type_to_array_glsl(physical_type);
  9510. }
  9511. auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index),
  9512. member_attribute_qualifier(type, index), array_type, ";");
  9513. is_using_builtin_array = false;
  9514. return result;
  9515. }
  9516. // Emit a structure member, padding and packing to maintain the correct memeber alignments.
  9517. void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  9518. const string &qualifier, uint32_t)
  9519. {
  9520. // If this member requires padding to maintain its declared offset, emit a dummy padding member before it.
  9521. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget))
  9522. {
  9523. uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget);
  9524. statement("char _m", index, "_pad", "[", pad_len, "];");
  9525. }
  9526. // Handle HLSL-style 0-based vertex/instance index.
  9527. builtin_declaration = true;
  9528. statement(to_struct_member(type, member_type_id, index, qualifier));
  9529. builtin_declaration = false;
  9530. }
  9531. void CompilerMSL::emit_struct_padding_target(const SPIRType &type)
  9532. {
  9533. uint32_t struct_size = get_declared_struct_size_msl(type, true, true);
  9534. uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget);
  9535. if (target_size < struct_size)
  9536. SPIRV_CROSS_THROW("Cannot pad with negative bytes.");
  9537. else if (target_size > struct_size)
  9538. statement("char _m0_final_padding[", target_size - struct_size, "];");
  9539. }
  9540. // Return a MSL qualifier for the specified function attribute member
  9541. string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index)
  9542. {
  9543. auto &execution = get_entry_point();
  9544. uint32_t mbr_type_id = type.member_types[index];
  9545. auto &mbr_type = get<SPIRType>(mbr_type_id);
  9546. BuiltIn builtin = BuiltInMax;
  9547. bool is_builtin = is_member_builtin(type, index, &builtin);
  9548. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
  9549. {
  9550. string quals = join(
  9551. " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
  9552. if (interlocked_resources.count(
  9553. get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
  9554. quals += ", raster_order_group(0)";
  9555. quals += "]]";
  9556. return quals;
  9557. }
  9558. // Vertex function inputs
  9559. if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
  9560. {
  9561. if (is_builtin)
  9562. {
  9563. switch (builtin)
  9564. {
  9565. case BuiltInVertexId:
  9566. case BuiltInVertexIndex:
  9567. case BuiltInBaseVertex:
  9568. case BuiltInInstanceId:
  9569. case BuiltInInstanceIndex:
  9570. case BuiltInBaseInstance:
  9571. if (msl_options.vertex_for_tessellation)
  9572. return "";
  9573. return string(" [[") + builtin_qualifier(builtin) + "]]";
  9574. case BuiltInDrawIndex:
  9575. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  9576. default:
  9577. return "";
  9578. }
  9579. }
  9580. uint32_t locn;
  9581. if (is_builtin)
  9582. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  9583. else
  9584. locn = get_member_location(type.self, index);
  9585. if (locn != k_unknown_location)
  9586. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  9587. }
  9588. // Vertex and tessellation evaluation function outputs
  9589. if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) ||
  9590. execution.model == ExecutionModelTessellationEvaluation) &&
  9591. type.storage == StorageClassOutput)
  9592. {
  9593. if (is_builtin)
  9594. {
  9595. switch (builtin)
  9596. {
  9597. case BuiltInPointSize:
  9598. // Only mark the PointSize builtin if really rendering points.
  9599. // Some shaders may include a PointSize builtin even when used to render
  9600. // non-point topologies, and Metal will reject this builtin when compiling
  9601. // the shader into a render pipeline that uses a non-point topology.
  9602. return msl_options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : "";
  9603. case BuiltInViewportIndex:
  9604. if (!msl_options.supports_msl_version(2, 0))
  9605. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  9606. /* fallthrough */
  9607. case BuiltInPosition:
  9608. case BuiltInLayer:
  9609. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  9610. case BuiltInClipDistance:
  9611. if (has_member_decoration(type.self, index, DecorationIndex))
  9612. return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  9613. else
  9614. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  9615. case BuiltInCullDistance:
  9616. if (has_member_decoration(type.self, index, DecorationIndex))
  9617. return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  9618. else
  9619. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  9620. default:
  9621. return "";
  9622. }
  9623. }
  9624. string loc_qual = member_location_attribute_qualifier(type, index);
  9625. if (!loc_qual.empty())
  9626. return join(" [[", loc_qual, "]]");
  9627. }
  9628. // Tessellation control function inputs
  9629. if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput)
  9630. {
  9631. if (is_builtin)
  9632. {
  9633. switch (builtin)
  9634. {
  9635. case BuiltInInvocationId:
  9636. case BuiltInPrimitiveId:
  9637. if (msl_options.multi_patch_workgroup)
  9638. return "";
  9639. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  9640. case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
  9641. case BuiltInSubgroupSize: // FIXME: Should work in any stage
  9642. if (msl_options.emulate_subgroups)
  9643. return "";
  9644. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  9645. case BuiltInPatchVertices:
  9646. return "";
  9647. // Others come from stage input.
  9648. default:
  9649. break;
  9650. }
  9651. }
  9652. if (msl_options.multi_patch_workgroup)
  9653. return "";
  9654. uint32_t locn;
  9655. if (is_builtin)
  9656. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  9657. else
  9658. locn = get_member_location(type.self, index);
  9659. if (locn != k_unknown_location)
  9660. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  9661. }
  9662. // Tessellation control function outputs
  9663. if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput)
  9664. {
  9665. // For this type of shader, we always arrange for it to capture its
  9666. // output to a buffer. For this reason, qualifiers are irrelevant here.
  9667. return "";
  9668. }
  9669. // Tessellation evaluation function inputs
  9670. if (execution.model == ExecutionModelTessellationEvaluation && type.storage == StorageClassInput)
  9671. {
  9672. if (is_builtin)
  9673. {
  9674. switch (builtin)
  9675. {
  9676. case BuiltInPrimitiveId:
  9677. case BuiltInTessCoord:
  9678. return string(" [[") + builtin_qualifier(builtin) + "]]";
  9679. case BuiltInPatchVertices:
  9680. return "";
  9681. // Others come from stage input.
  9682. default:
  9683. break;
  9684. }
  9685. }
  9686. // The special control point array must not be marked with an attribute.
  9687. if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray)
  9688. return "";
  9689. uint32_t locn;
  9690. if (is_builtin)
  9691. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  9692. else
  9693. locn = get_member_location(type.self, index);
  9694. if (locn != k_unknown_location)
  9695. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  9696. }
  9697. // Tessellation evaluation function outputs were handled above.
  9698. // Fragment function inputs
  9699. if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput)
  9700. {
  9701. string quals;
  9702. if (is_builtin)
  9703. {
  9704. switch (builtin)
  9705. {
  9706. case BuiltInViewIndex:
  9707. if (!msl_options.multiview || !msl_options.multiview_layered_rendering)
  9708. break;
  9709. /* fallthrough */
  9710. case BuiltInFrontFacing:
  9711. case BuiltInPointCoord:
  9712. case BuiltInFragCoord:
  9713. case BuiltInSampleId:
  9714. case BuiltInSampleMask:
  9715. case BuiltInLayer:
  9716. case BuiltInBaryCoordNV:
  9717. case BuiltInBaryCoordNoPerspNV:
  9718. quals = builtin_qualifier(builtin);
  9719. break;
  9720. case BuiltInClipDistance:
  9721. return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  9722. case BuiltInCullDistance:
  9723. return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  9724. default:
  9725. break;
  9726. }
  9727. }
  9728. else
  9729. quals = member_location_attribute_qualifier(type, index);
  9730. if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV)
  9731. {
  9732. if (has_member_decoration(type.self, index, DecorationFlat) ||
  9733. has_member_decoration(type.self, index, DecorationCentroid) ||
  9734. has_member_decoration(type.self, index, DecorationSample) ||
  9735. has_member_decoration(type.self, index, DecorationNoPerspective))
  9736. {
  9737. // NoPerspective is baked into the builtin type.
  9738. SPIRV_CROSS_THROW(
  9739. "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs.");
  9740. }
  9741. }
  9742. // Don't bother decorating integers with the 'flat' attribute; it's
  9743. // the default (in fact, the only option). Also don't bother with the
  9744. // FragCoord builtin; it's always noperspective on Metal.
  9745. if (!type_is_integral(mbr_type) && (!is_builtin || builtin != BuiltInFragCoord))
  9746. {
  9747. if (has_member_decoration(type.self, index, DecorationFlat))
  9748. {
  9749. if (!quals.empty())
  9750. quals += ", ";
  9751. quals += "flat";
  9752. }
  9753. else if (has_member_decoration(type.self, index, DecorationCentroid))
  9754. {
  9755. if (!quals.empty())
  9756. quals += ", ";
  9757. if (has_member_decoration(type.self, index, DecorationNoPerspective))
  9758. quals += "centroid_no_perspective";
  9759. else
  9760. quals += "centroid_perspective";
  9761. }
  9762. else if (has_member_decoration(type.self, index, DecorationSample))
  9763. {
  9764. if (!quals.empty())
  9765. quals += ", ";
  9766. if (has_member_decoration(type.self, index, DecorationNoPerspective))
  9767. quals += "sample_no_perspective";
  9768. else
  9769. quals += "sample_perspective";
  9770. }
  9771. else if (has_member_decoration(type.self, index, DecorationNoPerspective))
  9772. {
  9773. if (!quals.empty())
  9774. quals += ", ";
  9775. quals += "center_no_perspective";
  9776. }
  9777. }
  9778. if (!quals.empty())
  9779. return " [[" + quals + "]]";
  9780. }
  9781. // Fragment function outputs
  9782. if (execution.model == ExecutionModelFragment && type.storage == StorageClassOutput)
  9783. {
  9784. if (is_builtin)
  9785. {
  9786. switch (builtin)
  9787. {
  9788. case BuiltInFragStencilRefEXT:
  9789. // Similar to PointSize, only mark FragStencilRef if there's a stencil buffer.
  9790. // Some shaders may include a FragStencilRef builtin even when used to render
  9791. // without a stencil attachment, and Metal will reject this builtin
  9792. // when compiling the shader into a render pipeline that does not set
  9793. // stencilAttachmentPixelFormat.
  9794. if (!msl_options.enable_frag_stencil_ref_builtin)
  9795. return "";
  9796. if (!msl_options.supports_msl_version(2, 1))
  9797. SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up.");
  9798. return string(" [[") + builtin_qualifier(builtin) + "]]";
  9799. case BuiltInFragDepth:
  9800. // Ditto FragDepth.
  9801. if (!msl_options.enable_frag_depth_builtin)
  9802. return "";
  9803. /* fallthrough */
  9804. case BuiltInSampleMask:
  9805. return string(" [[") + builtin_qualifier(builtin) + "]]";
  9806. default:
  9807. return "";
  9808. }
  9809. }
  9810. uint32_t locn = get_member_location(type.self, index);
  9811. // Metal will likely complain about missing color attachments, too.
  9812. if (locn != k_unknown_location && !(msl_options.enable_frag_output_mask & (1 << locn)))
  9813. return "";
  9814. if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex))
  9815. return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex),
  9816. ")]]");
  9817. else if (locn != k_unknown_location)
  9818. return join(" [[color(", locn, ")]]");
  9819. else if (has_member_decoration(type.self, index, DecorationIndex))
  9820. return join(" [[index(", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  9821. else
  9822. return "";
  9823. }
  9824. // Compute function inputs
  9825. if (execution.model == ExecutionModelGLCompute && type.storage == StorageClassInput)
  9826. {
  9827. if (is_builtin)
  9828. {
  9829. switch (builtin)
  9830. {
  9831. case BuiltInNumSubgroups:
  9832. case BuiltInSubgroupId:
  9833. case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
  9834. case BuiltInSubgroupSize: // FIXME: Should work in any stage
  9835. if (msl_options.emulate_subgroups)
  9836. break;
  9837. /* fallthrough */
  9838. case BuiltInGlobalInvocationId:
  9839. case BuiltInWorkgroupId:
  9840. case BuiltInNumWorkgroups:
  9841. case BuiltInLocalInvocationId:
  9842. case BuiltInLocalInvocationIndex:
  9843. return string(" [[") + builtin_qualifier(builtin) + "]]";
  9844. default:
  9845. return "";
  9846. }
  9847. }
  9848. }
  9849. return "";
  9850. }
  9851. // A user-defined output variable is considered to match an input variable in the subsequent
  9852. // stage if the two variables are declared with the same Location and Component decoration and
  9853. // match in type and decoration, except that interpolation decorations are not required to match.
  9854. // For the purposes of interface matching, variables declared without a Component decoration are
  9855. // considered to have a Component decoration of zero.
  9856. string CompilerMSL::member_location_attribute_qualifier(const SPIRType &type, uint32_t index)
  9857. {
  9858. string quals;
  9859. uint32_t comp;
  9860. uint32_t locn = get_member_location(type.self, index, &comp);
  9861. if (locn != k_unknown_location)
  9862. {
  9863. quals += "user(locn";
  9864. quals += convert_to_string(locn);
  9865. if (comp != k_unknown_component && comp != 0)
  9866. {
  9867. quals += "_";
  9868. quals += convert_to_string(comp);
  9869. }
  9870. quals += ")";
  9871. }
  9872. return quals;
  9873. }
  9874. // Returns the location decoration of the member with the specified index in the specified type.
  9875. // If the location of the member has been explicitly set, that location is used. If not, this
  9876. // function assumes the members are ordered in their location order, and simply returns the
  9877. // index as the location.
  9878. uint32_t CompilerMSL::get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) const
  9879. {
  9880. if (comp)
  9881. {
  9882. if (has_member_decoration(type_id, index, DecorationComponent))
  9883. *comp = get_member_decoration(type_id, index, DecorationComponent);
  9884. else
  9885. *comp = k_unknown_component;
  9886. }
  9887. if (has_member_decoration(type_id, index, DecorationLocation))
  9888. return get_member_decoration(type_id, index, DecorationLocation);
  9889. else
  9890. return k_unknown_location;
  9891. }
  9892. uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
  9893. uint32_t type_id, uint32_t index,
  9894. uint32_t *comp)
  9895. {
  9896. uint32_t loc = get_member_location(type_id, index, comp);
  9897. if (loc != k_unknown_location)
  9898. return loc;
  9899. if (comp)
  9900. *comp = k_unknown_component;
  9901. // Late allocation. Find a location which is unused by the application.
  9902. // This can happen for built-in inputs in tessellation which are mixed and matched with user inputs.
  9903. auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
  9904. uint32_t count = type_to_location_count(mbr_type);
  9905. loc = 0;
  9906. const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
  9907. for (uint32_t i = 0; i < location_count; i++)
  9908. if (location_inputs_in_use.count(location + i) != 0)
  9909. return true;
  9910. return false;
  9911. };
  9912. while (location_range_in_use(loc, count))
  9913. loc++;
  9914. set_member_decoration(type_id, index, DecorationLocation, loc);
  9915. // Triangle tess level inputs are shared in one packed float4,
  9916. // mark both builtins as sharing one location.
  9917. if (get_execution_mode_bitset().get(ExecutionModeTriangles) &&
  9918. (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
  9919. {
  9920. builtin_to_automatic_input_location[BuiltInTessLevelInner] = loc;
  9921. builtin_to_automatic_input_location[BuiltInTessLevelOuter] = loc;
  9922. }
  9923. else
  9924. builtin_to_automatic_input_location[builtin] = loc;
  9925. mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput, true);
  9926. return loc;
  9927. }
  9928. // Returns the type declaration for a function, including the
  9929. // entry type if the current function is the entry point function
  9930. string CompilerMSL::func_type_decl(SPIRType &type)
  9931. {
  9932. // The regular function return type. If not processing the entry point function, that's all we need
  9933. string return_type = type_to_glsl(type) + type_to_array_glsl(type);
  9934. if (!processing_entry_point)
  9935. return return_type;
  9936. // If an outgoing interface block has been defined, and it should be returned, override the entry point return type
  9937. bool ep_should_return_output = !get_is_rasterization_disabled();
  9938. if (stage_out_var_id && ep_should_return_output)
  9939. return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type);
  9940. // Prepend a entry type, based on the execution model
  9941. string entry_type;
  9942. auto &execution = get_entry_point();
  9943. switch (execution.model)
  9944. {
  9945. case ExecutionModelVertex:
  9946. if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2))
  9947. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  9948. entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex";
  9949. break;
  9950. case ExecutionModelTessellationEvaluation:
  9951. if (!msl_options.supports_msl_version(1, 2))
  9952. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  9953. if (execution.flags.get(ExecutionModeIsolines))
  9954. SPIRV_CROSS_THROW("Metal does not support isoline tessellation.");
  9955. if (msl_options.is_ios())
  9956. entry_type =
  9957. join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ") ]] vertex");
  9958. else
  9959. entry_type = join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ", ",
  9960. execution.output_vertices, ") ]] vertex");
  9961. break;
  9962. case ExecutionModelFragment:
  9963. entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) ||
  9964. execution.flags.get(ExecutionModePostDepthCoverage) ?
  9965. "[[ early_fragment_tests ]] fragment" :
  9966. "fragment";
  9967. break;
  9968. case ExecutionModelTessellationControl:
  9969. if (!msl_options.supports_msl_version(1, 2))
  9970. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  9971. if (execution.flags.get(ExecutionModeIsolines))
  9972. SPIRV_CROSS_THROW("Metal does not support isoline tessellation.");
  9973. /* fallthrough */
  9974. case ExecutionModelGLCompute:
  9975. case ExecutionModelKernel:
  9976. entry_type = "kernel";
  9977. break;
  9978. default:
  9979. entry_type = "unknown";
  9980. break;
  9981. }
  9982. return entry_type + " " + return_type;
  9983. }
  9984. // In MSL, address space qualifiers are required for all pointer or reference variables
  9985. string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
  9986. {
  9987. const auto &type = get<SPIRType>(argument.basetype);
  9988. return get_type_address_space(type, argument.self, true);
  9989. }
  9990. string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
  9991. {
  9992. // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
  9993. Bitset flags;
  9994. auto *var = maybe_get<SPIRVariable>(id);
  9995. if (var && type.basetype == SPIRType::Struct &&
  9996. (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
  9997. flags = get_buffer_block_flags(id);
  9998. else
  9999. flags = get_decoration_bitset(id);
  10000. const char *addr_space = nullptr;
  10001. switch (type.storage)
  10002. {
  10003. case StorageClassWorkgroup:
  10004. addr_space = "threadgroup";
  10005. break;
  10006. case StorageClassStorageBuffer:
  10007. {
  10008. // For arguments from variable pointers, we use the write count deduction, so
  10009. // we should not assume any constness here. Only for global SSBOs.
  10010. bool readonly = false;
  10011. if (!var || has_decoration(type.self, DecorationBlock))
  10012. readonly = flags.get(DecorationNonWritable);
  10013. addr_space = readonly ? "const device" : "device";
  10014. break;
  10015. }
  10016. case StorageClassUniform:
  10017. case StorageClassUniformConstant:
  10018. case StorageClassPushConstant:
  10019. if (type.basetype == SPIRType::Struct)
  10020. {
  10021. bool ssbo = has_decoration(type.self, DecorationBufferBlock);
  10022. if (ssbo)
  10023. addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
  10024. else
  10025. addr_space = "constant";
  10026. }
  10027. else if (!argument)
  10028. {
  10029. addr_space = "constant";
  10030. }
  10031. else if (type_is_msl_framebuffer_fetch(type))
  10032. {
  10033. // Subpass inputs are passed around by value.
  10034. addr_space = "";
  10035. }
  10036. break;
  10037. case StorageClassFunction:
  10038. case StorageClassGeneric:
  10039. break;
  10040. case StorageClassInput:
  10041. if (get_execution_model() == ExecutionModelTessellationControl && var &&
  10042. var->basevariable == stage_in_ptr_var_id)
  10043. addr_space = msl_options.multi_patch_workgroup ? "constant" : "threadgroup";
  10044. if (get_execution_model() == ExecutionModelFragment && var && var->basevariable == stage_in_var_id)
  10045. addr_space = "thread";
  10046. break;
  10047. case StorageClassOutput:
  10048. if (capture_output_to_buffer)
  10049. {
  10050. if (var && type.storage == StorageClassOutput)
  10051. {
  10052. bool is_masked = is_stage_output_variable_masked(*var);
  10053. if (is_masked)
  10054. {
  10055. if (is_tessellation_shader())
  10056. addr_space = "threadgroup";
  10057. else
  10058. addr_space = "thread";
  10059. }
  10060. else if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
  10061. addr_space = "threadgroup";
  10062. }
  10063. if (!addr_space)
  10064. addr_space = "device";
  10065. }
  10066. break;
  10067. default:
  10068. break;
  10069. }
  10070. if (!addr_space)
  10071. {
  10072. // No address space for plain values.
  10073. addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
  10074. }
  10075. return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
  10076. }
  10077. const char *CompilerMSL::to_restrict(uint32_t id, bool space)
  10078. {
  10079. // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
  10080. Bitset flags;
  10081. if (ir.ids[id].get_type() == TypeVariable)
  10082. {
  10083. uint32_t type_id = expression_type_id(id);
  10084. auto &type = expression_type(id);
  10085. if (type.basetype == SPIRType::Struct &&
  10086. (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
  10087. flags = get_buffer_block_flags(id);
  10088. else
  10089. flags = get_decoration_bitset(id);
  10090. }
  10091. else
  10092. flags = get_decoration_bitset(id);
  10093. return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
  10094. }
  10095. string CompilerMSL::entry_point_arg_stage_in()
  10096. {
  10097. string decl;
  10098. if (get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup)
  10099. return decl;
  10100. // Stage-in structure
  10101. uint32_t stage_in_id;
  10102. if (get_execution_model() == ExecutionModelTessellationEvaluation)
  10103. stage_in_id = patch_stage_in_var_id;
  10104. else
  10105. stage_in_id = stage_in_var_id;
  10106. if (stage_in_id)
  10107. {
  10108. auto &var = get<SPIRVariable>(stage_in_id);
  10109. auto &type = get_variable_data_type(var);
  10110. add_resource_name(var.self);
  10111. decl = join(type_to_glsl(type), " ", to_name(var.self), " [[stage_in]]");
  10112. }
  10113. return decl;
  10114. }
  10115. // Returns true if this input builtin should be a direct parameter on a shader function parameter list,
  10116. // and false for builtins that should be passed or calculated some other way.
  10117. bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type)
  10118. {
  10119. switch (bi_type)
  10120. {
  10121. // Vertex function in
  10122. case BuiltInVertexId:
  10123. case BuiltInVertexIndex:
  10124. case BuiltInBaseVertex:
  10125. case BuiltInInstanceId:
  10126. case BuiltInInstanceIndex:
  10127. case BuiltInBaseInstance:
  10128. return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation;
  10129. // Tess. control function in
  10130. case BuiltInPosition:
  10131. case BuiltInPointSize:
  10132. case BuiltInClipDistance:
  10133. case BuiltInCullDistance:
  10134. case BuiltInPatchVertices:
  10135. return false;
  10136. case BuiltInInvocationId:
  10137. case BuiltInPrimitiveId:
  10138. return get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup;
  10139. // Tess. evaluation function in
  10140. case BuiltInTessLevelInner:
  10141. case BuiltInTessLevelOuter:
  10142. return false;
  10143. // Fragment function in
  10144. case BuiltInSamplePosition:
  10145. case BuiltInHelperInvocation:
  10146. case BuiltInBaryCoordNV:
  10147. case BuiltInBaryCoordNoPerspNV:
  10148. return false;
  10149. case BuiltInViewIndex:
  10150. return get_execution_model() == ExecutionModelFragment && msl_options.multiview &&
  10151. msl_options.multiview_layered_rendering;
  10152. // Compute function in
  10153. case BuiltInSubgroupId:
  10154. case BuiltInNumSubgroups:
  10155. return !msl_options.emulate_subgroups;
  10156. // Any stage function in
  10157. case BuiltInDeviceIndex:
  10158. case BuiltInSubgroupEqMask:
  10159. case BuiltInSubgroupGeMask:
  10160. case BuiltInSubgroupGtMask:
  10161. case BuiltInSubgroupLeMask:
  10162. case BuiltInSubgroupLtMask:
  10163. return false;
  10164. case BuiltInSubgroupSize:
  10165. if (msl_options.fixed_subgroup_size != 0)
  10166. return false;
  10167. /* fallthrough */
  10168. case BuiltInSubgroupLocalInvocationId:
  10169. return !msl_options.emulate_subgroups;
  10170. default:
  10171. return true;
  10172. }
  10173. }
  10174. // Returns true if this is a fragment shader that runs per sample, and false otherwise.
  10175. bool CompilerMSL::is_sample_rate() const
  10176. {
  10177. auto &caps = get_declared_capabilities();
  10178. return get_execution_model() == ExecutionModelFragment &&
  10179. (msl_options.force_sample_rate_shading ||
  10180. std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() ||
  10181. (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input));
  10182. }
  10183. bool CompilerMSL::is_intersection_query() const
  10184. {
  10185. auto &caps = get_declared_capabilities();
  10186. return std::find(caps.begin(), caps.end(), CapabilityRayQueryKHR) != caps.end();
  10187. }
  10188. void CompilerMSL::entry_point_args_builtin(string &ep_args)
  10189. {
  10190. // Builtin variables
  10191. SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins;
  10192. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  10193. if (var.storage != StorageClassInput)
  10194. return;
  10195. auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
  10196. // Don't emit SamplePosition as a separate parameter. In the entry
  10197. // point, we get that by calling get_sample_position() on the sample ID.
  10198. if (is_builtin_variable(var) &&
  10199. get_variable_data_type(var).basetype != SPIRType::Struct &&
  10200. get_variable_data_type(var).basetype != SPIRType::ControlPointArray)
  10201. {
  10202. // If the builtin is not part of the active input builtin set, don't emit it.
  10203. // Relevant for multiple entry-point modules which might declare unused builtins.
  10204. if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id))
  10205. return;
  10206. // Remember this variable. We may need to correct its type.
  10207. active_builtins.push_back(make_pair(&var, bi_type));
  10208. if (is_direct_input_builtin(bi_type))
  10209. {
  10210. if (!ep_args.empty())
  10211. ep_args += ", ";
  10212. // Handle HLSL-style 0-based vertex/instance index.
  10213. builtin_declaration = true;
  10214. ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
  10215. ep_args += " [[" + builtin_qualifier(bi_type);
  10216. if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
  10217. {
  10218. if (!msl_options.supports_msl_version(2))
  10219. SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
  10220. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
  10221. SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
  10222. ep_args += ", post_depth_coverage";
  10223. }
  10224. ep_args += "]]";
  10225. builtin_declaration = false;
  10226. }
  10227. }
  10228. if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase))
  10229. {
  10230. // This is a special implicit builtin, not corresponding to any SPIR-V builtin,
  10231. // which holds the base that was passed to vkCmdDispatchBase() or vkCmdDrawIndexed(). If it's present,
  10232. // assume we emitted it for a good reason.
  10233. assert(msl_options.supports_msl_version(1, 2));
  10234. if (!ep_args.empty())
  10235. ep_args += ", ";
  10236. ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]";
  10237. }
  10238. if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize))
  10239. {
  10240. // This is another special implicit builtin, not corresponding to any SPIR-V builtin,
  10241. // which holds the number of vertices and instances to draw. If it's present,
  10242. // assume we emitted it for a good reason.
  10243. assert(msl_options.supports_msl_version(1, 2));
  10244. if (!ep_args.empty())
  10245. ep_args += ", ";
  10246. ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_size]]";
  10247. }
  10248. });
  10249. // Correct the types of all encountered active builtins. We couldn't do this before
  10250. // because ensure_correct_builtin_type() may increase the bound, which isn't allowed
  10251. // while iterating over IDs.
  10252. for (auto &var : active_builtins)
  10253. var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second);
  10254. // Handle HLSL-style 0-based vertex/instance index.
  10255. if (needs_base_vertex_arg == TriState::Yes)
  10256. ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty());
  10257. if (needs_base_instance_arg == TriState::Yes)
  10258. ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty());
  10259. if (capture_output_to_buffer)
  10260. {
  10261. // Add parameters to hold the indirect draw parameters and the shader output. This has to be handled
  10262. // specially because it needs to be a pointer, not a reference.
  10263. if (stage_out_var_id)
  10264. {
  10265. if (!ep_args.empty())
  10266. ep_args += ", ";
  10267. ep_args += join("device ", type_to_glsl(get_stage_out_struct_type()), "* ", output_buffer_var_name,
  10268. " [[buffer(", msl_options.shader_output_buffer_index, ")]]");
  10269. }
  10270. if (get_execution_model() == ExecutionModelTessellationControl)
  10271. {
  10272. if (!ep_args.empty())
  10273. ep_args += ", ";
  10274. ep_args +=
  10275. join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
  10276. }
  10277. else if (stage_out_var_id &&
  10278. !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  10279. {
  10280. if (!ep_args.empty())
  10281. ep_args += ", ";
  10282. ep_args +=
  10283. join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
  10284. }
  10285. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation &&
  10286. (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) &&
  10287. msl_options.vertex_index_type != Options::IndexType::None)
  10288. {
  10289. // Add the index buffer so we can set gl_VertexIndex correctly.
  10290. if (!ep_args.empty())
  10291. ep_args += ", ";
  10292. switch (msl_options.vertex_index_type)
  10293. {
  10294. case Options::IndexType::None:
  10295. break;
  10296. case Options::IndexType::UInt16:
  10297. ep_args += join("const device ushort* ", index_buffer_var_name, " [[buffer(",
  10298. msl_options.shader_index_buffer_index, ")]]");
  10299. break;
  10300. case Options::IndexType::UInt32:
  10301. ep_args += join("const device uint* ", index_buffer_var_name, " [[buffer(",
  10302. msl_options.shader_index_buffer_index, ")]]");
  10303. break;
  10304. }
  10305. }
  10306. // Tessellation control shaders get three additional parameters:
  10307. // a buffer to hold the per-patch data, a buffer to hold the per-patch
  10308. // tessellation levels, and a block of workgroup memory to hold the
  10309. // input control point data.
  10310. if (get_execution_model() == ExecutionModelTessellationControl)
  10311. {
  10312. if (patch_stage_out_var_id)
  10313. {
  10314. if (!ep_args.empty())
  10315. ep_args += ", ";
  10316. ep_args +=
  10317. join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name,
  10318. " [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
  10319. }
  10320. if (!ep_args.empty())
  10321. ep_args += ", ";
  10322. ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(",
  10323. convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]");
  10324. // Initializer for tess factors must be handled specially since it's never declared as a normal variable.
  10325. uint32_t outer_factor_initializer_id = 0;
  10326. uint32_t inner_factor_initializer_id = 0;
  10327. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  10328. if (!has_decoration(var.self, DecorationBuiltIn) || var.storage != StorageClassOutput || !var.initializer)
  10329. return;
  10330. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  10331. if (builtin == BuiltInTessLevelInner)
  10332. inner_factor_initializer_id = var.initializer;
  10333. else if (builtin == BuiltInTessLevelOuter)
  10334. outer_factor_initializer_id = var.initializer;
  10335. });
  10336. const SPIRConstant *c = nullptr;
  10337. if (outer_factor_initializer_id && (c = maybe_get<SPIRConstant>(outer_factor_initializer_id)))
  10338. {
  10339. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  10340. entry_func.fixup_hooks_in.push_back([=]() {
  10341. uint32_t components = get_execution_mode_bitset().get(ExecutionModeTriangles) ? 3 : 4;
  10342. for (uint32_t i = 0; i < components; i++)
  10343. {
  10344. statement(builtin_to_glsl(BuiltInTessLevelOuter, StorageClassOutput), "[", i, "] = ",
  10345. "half(", to_expression(c->subconstants[i]), ");");
  10346. }
  10347. });
  10348. }
  10349. if (inner_factor_initializer_id && (c = maybe_get<SPIRConstant>(inner_factor_initializer_id)))
  10350. {
  10351. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  10352. if (get_execution_mode_bitset().get(ExecutionModeTriangles))
  10353. {
  10354. entry_func.fixup_hooks_in.push_back([=]() {
  10355. statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), " = ", "half(",
  10356. to_expression(c->subconstants[0]), ");");
  10357. });
  10358. }
  10359. else
  10360. {
  10361. entry_func.fixup_hooks_in.push_back([=]() {
  10362. for (uint32_t i = 0; i < 2; i++)
  10363. {
  10364. statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), "[", i, "] = ",
  10365. "half(", to_expression(c->subconstants[i]), ");");
  10366. }
  10367. });
  10368. }
  10369. }
  10370. if (stage_in_var_id)
  10371. {
  10372. if (!ep_args.empty())
  10373. ep_args += ", ";
  10374. if (msl_options.multi_patch_workgroup)
  10375. {
  10376. ep_args += join("device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name,
  10377. " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]");
  10378. }
  10379. else
  10380. {
  10381. ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name,
  10382. " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]");
  10383. }
  10384. }
  10385. }
  10386. }
  10387. }
  10388. string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
  10389. {
  10390. string ep_args = entry_point_arg_stage_in();
  10391. Bitset claimed_bindings;
  10392. for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
  10393. {
  10394. uint32_t id = argument_buffer_ids[i];
  10395. if (id == 0)
  10396. continue;
  10397. add_resource_name(id);
  10398. auto &var = get<SPIRVariable>(id);
  10399. auto &type = get_variable_data_type(var);
  10400. if (!ep_args.empty())
  10401. ep_args += ", ";
  10402. // Check if the argument buffer binding itself has been remapped.
  10403. uint32_t buffer_binding;
  10404. auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
  10405. if (itr != end(resource_bindings))
  10406. {
  10407. buffer_binding = itr->second.first.msl_buffer;
  10408. itr->second.second = true;
  10409. }
  10410. else
  10411. {
  10412. // As a fallback, directly map desc set <-> binding.
  10413. // If that was taken, take the next buffer binding.
  10414. if (claimed_bindings.get(i))
  10415. buffer_binding = next_metal_resource_index_buffer;
  10416. else
  10417. buffer_binding = i;
  10418. }
  10419. claimed_bindings.set(buffer_binding);
  10420. ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
  10421. ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
  10422. next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
  10423. }
  10424. entry_point_args_discrete_descriptors(ep_args);
  10425. entry_point_args_builtin(ep_args);
  10426. if (!ep_args.empty() && append_comma)
  10427. ep_args += ", ";
  10428. return ep_args;
  10429. }
  10430. const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const
  10431. {
  10432. // Try by ID.
  10433. {
  10434. auto itr = constexpr_samplers_by_id.find(id);
  10435. if (itr != end(constexpr_samplers_by_id))
  10436. return &itr->second;
  10437. }
  10438. // Try by binding.
  10439. {
  10440. uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
  10441. uint32_t binding = get_decoration(id, DecorationBinding);
  10442. auto itr = constexpr_samplers_by_binding.find({ desc_set, binding });
  10443. if (itr != end(constexpr_samplers_by_binding))
  10444. return &itr->second;
  10445. }
  10446. return nullptr;
  10447. }
  10448. void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
  10449. {
  10450. // Output resources, sorted by resource index & type
  10451. // We need to sort to work around a bug on macOS 10.13 with NVidia drivers where switching between shaders
  10452. // with different order of buffers can result in issues with buffer assignments inside the driver.
  10453. struct Resource
  10454. {
  10455. SPIRVariable *var;
  10456. string name;
  10457. SPIRType::BaseType basetype;
  10458. uint32_t index;
  10459. uint32_t plane;
  10460. uint32_t secondary_index;
  10461. };
  10462. SmallVector<Resource> resources;
  10463. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  10464. if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  10465. var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
  10466. !is_hidden_variable(var))
  10467. {
  10468. auto &type = get_variable_data_type(var);
  10469. if (is_supported_argument_buffer_type(type) && var.storage != StorageClassPushConstant)
  10470. {
  10471. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  10472. if (descriptor_set_is_argument_buffer(desc_set))
  10473. return;
  10474. }
  10475. const MSLConstexprSampler *constexpr_sampler = nullptr;
  10476. if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
  10477. {
  10478. constexpr_sampler = find_constexpr_sampler(var_id);
  10479. if (constexpr_sampler)
  10480. {
  10481. // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
  10482. constexpr_samplers_by_id[var_id] = *constexpr_sampler;
  10483. }
  10484. }
  10485. // Emulate texture2D atomic operations
  10486. uint32_t secondary_index = 0;
  10487. if (atomic_image_vars.count(var.self))
  10488. {
  10489. secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
  10490. }
  10491. if (type.basetype == SPIRType::SampledImage)
  10492. {
  10493. add_resource_name(var_id);
  10494. uint32_t plane_count = 1;
  10495. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  10496. plane_count = constexpr_sampler->planes;
  10497. for (uint32_t i = 0; i < plane_count; i++)
  10498. resources.push_back({ &var, to_name(var_id), SPIRType::Image,
  10499. get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index });
  10500. if (type.image.dim != DimBuffer && !constexpr_sampler)
  10501. {
  10502. resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler,
  10503. get_metal_resource_index(var, SPIRType::Sampler), 0, 0 });
  10504. }
  10505. }
  10506. else if (!constexpr_sampler)
  10507. {
  10508. // constexpr samplers are not declared as resources.
  10509. add_resource_name(var_id);
  10510. resources.push_back({ &var, to_name(var_id), type.basetype,
  10511. get_metal_resource_index(var, type.basetype), 0, secondary_index });
  10512. }
  10513. }
  10514. });
  10515. sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) {
  10516. return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index);
  10517. });
  10518. for (auto &r : resources)
  10519. {
  10520. auto &var = *r.var;
  10521. auto &type = get_variable_data_type(var);
  10522. uint32_t var_id = var.self;
  10523. switch (r.basetype)
  10524. {
  10525. case SPIRType::Struct:
  10526. {
  10527. auto &m = ir.meta[type.self];
  10528. if (m.members.size() == 0)
  10529. break;
  10530. if (!type.array.empty())
  10531. {
  10532. if (type.array.size() > 1)
  10533. SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported.");
  10534. // Metal doesn't directly support this, so we must expand the
  10535. // array. We'll declare a local array to hold these elements
  10536. // later.
  10537. uint32_t array_size = to_array_size_literal(type);
  10538. if (array_size == 0)
  10539. SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL.");
  10540. // Allow Metal to use the array<T> template to make arrays a value type
  10541. is_using_builtin_array = true;
  10542. buffer_arrays.push_back(var_id);
  10543. for (uint32_t i = 0; i < array_size; ++i)
  10544. {
  10545. if (!ep_args.empty())
  10546. ep_args += ", ";
  10547. ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
  10548. r.name + "_" + convert_to_string(i);
  10549. ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
  10550. if (interlocked_resources.count(var_id))
  10551. ep_args += ", raster_order_group(0)";
  10552. ep_args += "]]";
  10553. }
  10554. is_using_builtin_array = false;
  10555. }
  10556. else
  10557. {
  10558. if (!ep_args.empty())
  10559. ep_args += ", ";
  10560. ep_args +=
  10561. get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
  10562. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  10563. if (interlocked_resources.count(var_id))
  10564. ep_args += ", raster_order_group(0)";
  10565. ep_args += "]]";
  10566. }
  10567. break;
  10568. }
  10569. case SPIRType::Sampler:
  10570. if (!ep_args.empty())
  10571. ep_args += ", ";
  10572. ep_args += sampler_type(type, var_id) + " " + r.name;
  10573. ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]";
  10574. break;
  10575. case SPIRType::Image:
  10576. {
  10577. if (!ep_args.empty())
  10578. ep_args += ", ";
  10579. // Use Metal's native frame-buffer fetch API for subpass inputs.
  10580. const auto &basetype = get<SPIRType>(var.basetype);
  10581. if (!type_is_msl_framebuffer_fetch(basetype))
  10582. {
  10583. ep_args += image_type_glsl(type, var_id) + " " + r.name;
  10584. if (r.plane > 0)
  10585. ep_args += join(plane_name_suffix, r.plane);
  10586. ep_args += " [[texture(" + convert_to_string(r.index) + ")";
  10587. if (interlocked_resources.count(var_id))
  10588. ep_args += ", raster_order_group(0)";
  10589. ep_args += "]]";
  10590. }
  10591. else
  10592. {
  10593. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
  10594. SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
  10595. ep_args += image_type_glsl(type, var_id) + " " + r.name;
  10596. ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
  10597. }
  10598. // Emulate texture2D atomic operations
  10599. if (atomic_image_vars.count(var.self))
  10600. {
  10601. ep_args += ", device atomic_" + type_to_glsl(get<SPIRType>(basetype.image.type), 0);
  10602. ep_args += "* " + r.name + "_atomic";
  10603. ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")";
  10604. if (interlocked_resources.count(var_id))
  10605. ep_args += ", raster_order_group(0)";
  10606. ep_args += "]]";
  10607. }
  10608. break;
  10609. }
  10610. case SPIRType::AccelerationStructure:
  10611. ep_args += ", " + type_to_glsl(type, var_id) + " " + r.name;
  10612. ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
  10613. break;
  10614. default:
  10615. if (!ep_args.empty())
  10616. ep_args += ", ";
  10617. if (!type.pointer)
  10618. ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " +
  10619. type_to_glsl(type, var_id) + "& " + r.name;
  10620. else
  10621. ep_args += type_to_glsl(type, var_id) + " " + r.name;
  10622. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  10623. if (interlocked_resources.count(var_id))
  10624. ep_args += ", raster_order_group(0)";
  10625. ep_args += "]]";
  10626. break;
  10627. }
  10628. }
  10629. }
  10630. // Returns a string containing a comma-delimited list of args for the entry point function
  10631. // This is the "classic" method of MSL 1 when we don't have argument buffer support.
  10632. string CompilerMSL::entry_point_args_classic(bool append_comma)
  10633. {
  10634. string ep_args = entry_point_arg_stage_in();
  10635. entry_point_args_discrete_descriptors(ep_args);
  10636. entry_point_args_builtin(ep_args);
  10637. if (!ep_args.empty() && append_comma)
  10638. ep_args += ", ";
  10639. return ep_args;
  10640. }
  10641. void CompilerMSL::fix_up_shader_inputs_outputs()
  10642. {
  10643. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  10644. // Emit a guard to ensure we don't execute beyond the last vertex.
  10645. // Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that
  10646. // tessellation control shaders do, so early returns should be OK. We may need to revisit this
  10647. // if it ever becomes possible to use barriers from a vertex shader.
  10648. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
  10649. {
  10650. entry_func.fixup_hooks_in.push_back([this]() {
  10651. statement("if (any(", to_expression(builtin_invocation_id_id),
  10652. " >= ", to_expression(builtin_stage_input_size_id), "))");
  10653. statement(" return;");
  10654. });
  10655. }
  10656. // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths.
  10657. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  10658. auto &type = get_variable_data_type(var);
  10659. uint32_t var_id = var.self;
  10660. bool ssbo = has_decoration(type.self, DecorationBufferBlock);
  10661. if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var))
  10662. {
  10663. if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
  10664. {
  10665. entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
  10666. bool is_array_type = !type.array.empty();
  10667. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  10668. if (descriptor_set_is_argument_buffer(desc_set))
  10669. {
  10670. statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
  10671. is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
  10672. ".spvSwizzleConstants", "[",
  10673. convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
  10674. }
  10675. else
  10676. {
  10677. // If we have an array of images, we need to be able to index into it, so take a pointer instead.
  10678. statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
  10679. is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[",
  10680. convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
  10681. }
  10682. });
  10683. }
  10684. }
  10685. else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) &&
  10686. !is_hidden_variable(var))
  10687. {
  10688. if (buffers_requiring_array_length.count(var.self))
  10689. {
  10690. entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
  10691. bool is_array_type = !type.array.empty();
  10692. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  10693. if (descriptor_set_is_argument_buffer(desc_set))
  10694. {
  10695. statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
  10696. is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
  10697. ".spvBufferSizeConstants", "[",
  10698. convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
  10699. }
  10700. else
  10701. {
  10702. // If we have an array of images, we need to be able to index into it, so take a pointer instead.
  10703. statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
  10704. is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[",
  10705. convert_to_string(get_metal_resource_index(var, type.basetype)), "];");
  10706. }
  10707. });
  10708. }
  10709. }
  10710. });
  10711. // Builtin variables
  10712. ir.for_each_typed_id<SPIRVariable>([this, &entry_func](uint32_t, SPIRVariable &var) {
  10713. uint32_t var_id = var.self;
  10714. BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type;
  10715. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  10716. return;
  10717. if (!interface_variable_exists_in_entry_point(var.self))
  10718. return;
  10719. if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type))
  10720. {
  10721. switch (bi_type)
  10722. {
  10723. case BuiltInSamplePosition:
  10724. entry_func.fixup_hooks_in.push_back([=]() {
  10725. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = get_sample_position(",
  10726. to_expression(builtin_sample_id_id), ");");
  10727. });
  10728. break;
  10729. case BuiltInFragCoord:
  10730. if (is_sample_rate())
  10731. {
  10732. entry_func.fixup_hooks_in.push_back([=]() {
  10733. statement(to_expression(var_id), ".xy += get_sample_position(",
  10734. to_expression(builtin_sample_id_id), ") - 0.5;");
  10735. });
  10736. }
  10737. break;
  10738. case BuiltInHelperInvocation:
  10739. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  10740. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS.");
  10741. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  10742. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
  10743. entry_func.fixup_hooks_in.push_back([=]() {
  10744. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();");
  10745. });
  10746. break;
  10747. case BuiltInInvocationId:
  10748. // This is direct-mapped without multi-patch workgroups.
  10749. if (get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup)
  10750. break;
  10751. entry_func.fixup_hooks_in.push_back([=]() {
  10752. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  10753. to_expression(builtin_invocation_id_id), ".x % ", this->get_entry_point().output_vertices,
  10754. ";");
  10755. });
  10756. break;
  10757. case BuiltInPrimitiveId:
  10758. // This is natively supported by fragment and tessellation evaluation shaders.
  10759. // In tessellation control shaders, this is direct-mapped without multi-patch workgroups.
  10760. if (get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup)
  10761. break;
  10762. entry_func.fixup_hooks_in.push_back([=]() {
  10763. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(",
  10764. to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices,
  10765. ", spvIndirectParams[1] - 1);");
  10766. });
  10767. break;
  10768. case BuiltInPatchVertices:
  10769. if (get_execution_model() == ExecutionModelTessellationEvaluation)
  10770. entry_func.fixup_hooks_in.push_back([=]() {
  10771. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  10772. to_expression(patch_stage_in_var_id), ".gl_in.size();");
  10773. });
  10774. else
  10775. entry_func.fixup_hooks_in.push_back([=]() {
  10776. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = spvIndirectParams[0];");
  10777. });
  10778. break;
  10779. case BuiltInTessCoord:
  10780. // Emit a fixup to account for the shifted domain. Don't do this for triangles;
  10781. // MoltenVK will just reverse the winding order instead.
  10782. if (msl_options.tess_domain_origin_lower_left && !get_entry_point().flags.get(ExecutionModeTriangles))
  10783. {
  10784. string tc = to_expression(var_id);
  10785. entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
  10786. }
  10787. break;
  10788. case BuiltInSubgroupId:
  10789. if (!msl_options.emulate_subgroups)
  10790. break;
  10791. // For subgroup emulation, this is the same as the local invocation index.
  10792. entry_func.fixup_hooks_in.push_back([=]() {
  10793. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  10794. to_expression(builtin_local_invocation_index_id), ";");
  10795. });
  10796. break;
  10797. case BuiltInNumSubgroups:
  10798. if (!msl_options.emulate_subgroups)
  10799. break;
  10800. // For subgroup emulation, this is the same as the workgroup size.
  10801. entry_func.fixup_hooks_in.push_back([=]() {
  10802. auto &type = expression_type(builtin_workgroup_size_id);
  10803. string size_expr = to_expression(builtin_workgroup_size_id);
  10804. if (type.vecsize >= 3)
  10805. size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z");
  10806. else if (type.vecsize == 2)
  10807. size_expr = join(size_expr, ".x * ", size_expr, ".y");
  10808. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";");
  10809. });
  10810. break;
  10811. case BuiltInSubgroupLocalInvocationId:
  10812. if (!msl_options.emulate_subgroups)
  10813. break;
  10814. // For subgroup emulation, assume subgroups of size 1.
  10815. entry_func.fixup_hooks_in.push_back(
  10816. [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); });
  10817. break;
  10818. case BuiltInSubgroupSize:
  10819. if (msl_options.emulate_subgroups)
  10820. {
  10821. // For subgroup emulation, assume subgroups of size 1.
  10822. entry_func.fixup_hooks_in.push_back(
  10823. [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); });
  10824. }
  10825. else if (msl_options.fixed_subgroup_size != 0)
  10826. {
  10827. entry_func.fixup_hooks_in.push_back([=]() {
  10828. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  10829. msl_options.fixed_subgroup_size, ";");
  10830. });
  10831. }
  10832. break;
  10833. case BuiltInSubgroupEqMask:
  10834. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  10835. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  10836. if (!msl_options.supports_msl_version(2, 1))
  10837. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  10838. entry_func.fixup_hooks_in.push_back([=]() {
  10839. if (msl_options.is_ios())
  10840. {
  10841. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ",
  10842. to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
  10843. }
  10844. else
  10845. {
  10846. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  10847. to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
  10848. to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
  10849. to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
  10850. }
  10851. });
  10852. break;
  10853. case BuiltInSubgroupGeMask:
  10854. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  10855. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  10856. if (!msl_options.supports_msl_version(2, 1))
  10857. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  10858. if (msl_options.fixed_subgroup_size != 0)
  10859. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  10860. entry_func.fixup_hooks_in.push_back([=]() {
  10861. // Case where index < 32, size < 32:
  10862. // mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
  10863. // mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
  10864. // Case where index < 32 but size >= 32:
  10865. // mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
  10866. // mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
  10867. // Case where index >= 32:
  10868. // mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
  10869. // mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
  10870. // This is expressed without branches to avoid divergent
  10871. // control flow--hence the complicated min/max expressions.
  10872. // This is further complicated by the fact that if you attempt
  10873. // to bfi/bfe out-of-bounds on Metal, undefined behavior is the
  10874. // result.
  10875. if (msl_options.fixed_subgroup_size > 32)
  10876. {
  10877. // Don't use the subgroup size variable with fixed subgroup sizes,
  10878. // since the variables could be defined in the wrong order.
  10879. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10880. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  10881. to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)",
  10882. to_expression(builtin_subgroup_invocation_id_id),
  10883. ", 0)), insert_bits(0u, 0xFFFFFFFF,"
  10884. " (uint)max((int)",
  10885. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ",
  10886. msl_options.fixed_subgroup_size, " - max(",
  10887. to_expression(builtin_subgroup_invocation_id_id),
  10888. ", 32u)), uint2(0));");
  10889. }
  10890. else if (msl_options.fixed_subgroup_size != 0)
  10891. {
  10892. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10893. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  10894. to_expression(builtin_subgroup_invocation_id_id), ", ",
  10895. msl_options.fixed_subgroup_size, " - ",
  10896. to_expression(builtin_subgroup_invocation_id_id),
  10897. "), uint3(0));");
  10898. }
  10899. else if (msl_options.is_ios())
  10900. {
  10901. // On iOS, the SIMD-group size will currently never exceed 32.
  10902. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10903. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  10904. to_expression(builtin_subgroup_invocation_id_id), ", ",
  10905. to_expression(builtin_subgroup_size_id), " - ",
  10906. to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
  10907. }
  10908. else
  10909. {
  10910. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10911. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  10912. to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
  10913. to_expression(builtin_subgroup_size_id), ", 32) - (int)",
  10914. to_expression(builtin_subgroup_invocation_id_id),
  10915. ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  10916. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
  10917. to_expression(builtin_subgroup_size_id), " - (int)max(",
  10918. to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
  10919. }
  10920. });
  10921. break;
  10922. case BuiltInSubgroupGtMask:
  10923. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  10924. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  10925. if (!msl_options.supports_msl_version(2, 1))
  10926. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  10927. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  10928. entry_func.fixup_hooks_in.push_back([=]() {
  10929. // The same logic applies here, except now the index is one
  10930. // more than the subgroup invocation ID.
  10931. if (msl_options.fixed_subgroup_size > 32)
  10932. {
  10933. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10934. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  10935. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)",
  10936. to_expression(builtin_subgroup_invocation_id_id),
  10937. " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  10938. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ",
  10939. msl_options.fixed_subgroup_size, " - max(",
  10940. to_expression(builtin_subgroup_invocation_id_id),
  10941. " + 1, 32u)), uint2(0));");
  10942. }
  10943. else if (msl_options.fixed_subgroup_size != 0)
  10944. {
  10945. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10946. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  10947. to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
  10948. msl_options.fixed_subgroup_size, " - ",
  10949. to_expression(builtin_subgroup_invocation_id_id),
  10950. " - 1), uint3(0));");
  10951. }
  10952. else if (msl_options.is_ios())
  10953. {
  10954. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10955. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  10956. to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
  10957. to_expression(builtin_subgroup_size_id), " - ",
  10958. to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));");
  10959. }
  10960. else
  10961. {
  10962. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10963. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  10964. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
  10965. to_expression(builtin_subgroup_size_id), ", 32) - (int)",
  10966. to_expression(builtin_subgroup_invocation_id_id),
  10967. " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  10968. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
  10969. to_expression(builtin_subgroup_size_id), " - (int)max(",
  10970. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
  10971. }
  10972. });
  10973. break;
  10974. case BuiltInSubgroupLeMask:
  10975. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  10976. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  10977. if (!msl_options.supports_msl_version(2, 1))
  10978. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  10979. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  10980. entry_func.fixup_hooks_in.push_back([=]() {
  10981. if (msl_options.is_ios())
  10982. {
  10983. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10984. " = uint4(extract_bits(0xFFFFFFFF, 0, ",
  10985. to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));");
  10986. }
  10987. else
  10988. {
  10989. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  10990. " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
  10991. to_expression(builtin_subgroup_invocation_id_id),
  10992. " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
  10993. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
  10994. }
  10995. });
  10996. break;
  10997. case BuiltInSubgroupLtMask:
  10998. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  10999. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  11000. if (!msl_options.supports_msl_version(2, 1))
  11001. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  11002. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  11003. entry_func.fixup_hooks_in.push_back([=]() {
  11004. if (msl_options.is_ios())
  11005. {
  11006. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  11007. " = uint4(extract_bits(0xFFFFFFFF, 0, ",
  11008. to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
  11009. }
  11010. else
  11011. {
  11012. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  11013. " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
  11014. to_expression(builtin_subgroup_invocation_id_id),
  11015. ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
  11016. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
  11017. }
  11018. });
  11019. break;
  11020. case BuiltInViewIndex:
  11021. if (!msl_options.multiview)
  11022. {
  11023. // According to the Vulkan spec, when not running under a multiview
  11024. // render pass, ViewIndex is 0.
  11025. entry_func.fixup_hooks_in.push_back([=]() {
  11026. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
  11027. });
  11028. }
  11029. else if (msl_options.view_index_from_device_index)
  11030. {
  11031. // In this case, we take the view index from that of the device we're running on.
  11032. entry_func.fixup_hooks_in.push_back([=]() {
  11033. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11034. msl_options.device_index, ";");
  11035. });
  11036. // We actually don't want to set the render_target_array_index here.
  11037. // Since every physical device is rendering a different view,
  11038. // there's no need for layered rendering here.
  11039. }
  11040. else if (!msl_options.multiview_layered_rendering)
  11041. {
  11042. // In this case, the views are rendered one at a time. The view index, then,
  11043. // is just the first part of the "view mask".
  11044. entry_func.fixup_hooks_in.push_back([=]() {
  11045. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11046. to_expression(view_mask_buffer_id), "[0];");
  11047. });
  11048. }
  11049. else if (get_execution_model() == ExecutionModelFragment)
  11050. {
  11051. // Because we adjusted the view index in the vertex shader, we have to
  11052. // adjust it back here.
  11053. entry_func.fixup_hooks_in.push_back([=]() {
  11054. statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
  11055. });
  11056. }
  11057. else if (get_execution_model() == ExecutionModelVertex)
  11058. {
  11059. // Metal provides no special support for multiview, so we smuggle
  11060. // the view index in the instance index.
  11061. entry_func.fixup_hooks_in.push_back([=]() {
  11062. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11063. to_expression(view_mask_buffer_id), "[0] + (", to_expression(builtin_instance_idx_id),
  11064. " - ", to_expression(builtin_base_instance_id), ") % ",
  11065. to_expression(view_mask_buffer_id), "[1];");
  11066. statement(to_expression(builtin_instance_idx_id), " = (",
  11067. to_expression(builtin_instance_idx_id), " - ",
  11068. to_expression(builtin_base_instance_id), ") / ", to_expression(view_mask_buffer_id),
  11069. "[1] + ", to_expression(builtin_base_instance_id), ";");
  11070. });
  11071. // In addition to setting the variable itself, we also need to
  11072. // set the render_target_array_index with it on output. We have to
  11073. // offset this by the base view index, because Metal isn't in on
  11074. // our little game here.
  11075. entry_func.fixup_hooks_out.push_back([=]() {
  11076. statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
  11077. to_expression(view_mask_buffer_id), "[0];");
  11078. });
  11079. }
  11080. break;
  11081. case BuiltInDeviceIndex:
  11082. // Metal pipelines belong to the devices which create them, so we'll
  11083. // need to create a MTLPipelineState for every MTLDevice in a grouped
  11084. // VkDevice. We can assume, then, that the device index is constant.
  11085. entry_func.fixup_hooks_in.push_back([=]() {
  11086. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11087. msl_options.device_index, ";");
  11088. });
  11089. break;
  11090. case BuiltInWorkgroupId:
  11091. if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId))
  11092. break;
  11093. // The vkCmdDispatchBase() command lets the client set the base value
  11094. // of WorkgroupId. Metal has no direct equivalent; we must make this
  11095. // adjustment ourselves.
  11096. entry_func.fixup_hooks_in.push_back([=]() {
  11097. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";");
  11098. });
  11099. break;
  11100. case BuiltInGlobalInvocationId:
  11101. if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId))
  11102. break;
  11103. // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize.
  11104. // This needs to be adjusted too.
  11105. entry_func.fixup_hooks_in.push_back([=]() {
  11106. auto &execution = this->get_entry_point();
  11107. uint32_t workgroup_size_id = execution.workgroup_size.constant;
  11108. if (workgroup_size_id)
  11109. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
  11110. " * ", to_expression(workgroup_size_id), ";");
  11111. else
  11112. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
  11113. " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ",
  11114. execution.workgroup_size.z, ");");
  11115. });
  11116. break;
  11117. case BuiltInVertexId:
  11118. case BuiltInVertexIndex:
  11119. // This is direct-mapped normally.
  11120. if (!msl_options.vertex_for_tessellation)
  11121. break;
  11122. entry_func.fixup_hooks_in.push_back([=]() {
  11123. builtin_declaration = true;
  11124. switch (msl_options.vertex_index_type)
  11125. {
  11126. case Options::IndexType::None:
  11127. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11128. to_expression(builtin_invocation_id_id), ".x + ",
  11129. to_expression(builtin_dispatch_base_id), ".x;");
  11130. break;
  11131. case Options::IndexType::UInt16:
  11132. case Options::IndexType::UInt32:
  11133. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", index_buffer_var_name,
  11134. "[", to_expression(builtin_invocation_id_id), ".x] + ",
  11135. to_expression(builtin_dispatch_base_id), ".x;");
  11136. break;
  11137. }
  11138. builtin_declaration = false;
  11139. });
  11140. break;
  11141. case BuiltInBaseVertex:
  11142. // This is direct-mapped normally.
  11143. if (!msl_options.vertex_for_tessellation)
  11144. break;
  11145. entry_func.fixup_hooks_in.push_back([=]() {
  11146. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11147. to_expression(builtin_dispatch_base_id), ".x;");
  11148. });
  11149. break;
  11150. case BuiltInInstanceId:
  11151. case BuiltInInstanceIndex:
  11152. // This is direct-mapped normally.
  11153. if (!msl_options.vertex_for_tessellation)
  11154. break;
  11155. entry_func.fixup_hooks_in.push_back([=]() {
  11156. builtin_declaration = true;
  11157. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11158. to_expression(builtin_invocation_id_id), ".y + ", to_expression(builtin_dispatch_base_id),
  11159. ".y;");
  11160. builtin_declaration = false;
  11161. });
  11162. break;
  11163. case BuiltInBaseInstance:
  11164. // This is direct-mapped normally.
  11165. if (!msl_options.vertex_for_tessellation)
  11166. break;
  11167. entry_func.fixup_hooks_in.push_back([=]() {
  11168. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  11169. to_expression(builtin_dispatch_base_id), ".y;");
  11170. });
  11171. break;
  11172. default:
  11173. break;
  11174. }
  11175. }
  11176. else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
  11177. is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
  11178. bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
  11179. {
  11180. // If the additional fixed sample mask was set, we need to adjust the sample_mask
  11181. // output to reflect that. If the shader outputs the sample_mask itself too, we need
  11182. // to AND the two masks to get the final one.
  11183. string op_str = does_shader_write_sample_mask ? " &= " : " = ";
  11184. entry_func.fixup_hooks_out.push_back([=]() {
  11185. statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
  11186. });
  11187. }
  11188. });
  11189. }
  11190. // Returns the Metal index of the resource of the specified type as used by the specified variable.
  11191. uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
  11192. {
  11193. auto &execution = get_entry_point();
  11194. auto &var_dec = ir.meta[var.self].decoration;
  11195. auto &var_type = get<SPIRType>(var.basetype);
  11196. uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set;
  11197. uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding;
  11198. // If a matching binding has been specified, find and use it.
  11199. auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding });
  11200. // Atomic helper buffers for image atomics need to use secondary bindings as well.
  11201. bool use_secondary_binding = (var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler) ||
  11202. basetype == SPIRType::AtomicCounter;
  11203. auto resource_decoration =
  11204. use_secondary_binding ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary;
  11205. if (plane == 1)
  11206. resource_decoration = SPIRVCrossDecorationResourceIndexTertiary;
  11207. if (plane == 2)
  11208. resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary;
  11209. if (itr != end(resource_bindings))
  11210. {
  11211. auto &remap = itr->second;
  11212. remap.second = true;
  11213. switch (basetype)
  11214. {
  11215. case SPIRType::Image:
  11216. set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane);
  11217. return remap.first.msl_texture + plane;
  11218. case SPIRType::Sampler:
  11219. set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler);
  11220. return remap.first.msl_sampler;
  11221. default:
  11222. set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer);
  11223. return remap.first.msl_buffer;
  11224. }
  11225. }
  11226. // If we have already allocated an index, keep using it.
  11227. if (has_extended_decoration(var.self, resource_decoration))
  11228. return get_extended_decoration(var.self, resource_decoration);
  11229. auto &type = get<SPIRType>(var.basetype);
  11230. if (type_is_msl_framebuffer_fetch(type))
  11231. {
  11232. // Frame-buffer fetch gets its fallback resource index from the input attachment index,
  11233. // which is then treated as color index.
  11234. return get_decoration(var.self, DecorationInputAttachmentIndex);
  11235. }
  11236. else if (msl_options.enable_decoration_binding)
  11237. {
  11238. // Allow user to enable decoration binding.
  11239. // If there is no explicit mapping of bindings to MSL, use the declared binding as a fallback.
  11240. if (has_decoration(var.self, DecorationBinding))
  11241. {
  11242. var_binding = get_decoration(var.self, DecorationBinding);
  11243. // Avoid emitting sentinel bindings.
  11244. if (var_binding < 0x80000000u)
  11245. return var_binding;
  11246. }
  11247. }
  11248. // If we did not explicitly remap, allocate bindings on demand.
  11249. // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
  11250. bool allocate_argument_buffer_ids = false;
  11251. if (var.storage != StorageClassPushConstant)
  11252. allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set);
  11253. uint32_t binding_stride = 1;
  11254. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  11255. binding_stride *= to_array_size_literal(type, i);
  11256. assert(binding_stride != 0);
  11257. // If a binding has not been specified, revert to incrementing resource indices.
  11258. uint32_t resource_index;
  11259. if (allocate_argument_buffer_ids)
  11260. {
  11261. // Allocate from a flat ID binding space.
  11262. resource_index = next_metal_resource_ids[var_desc_set];
  11263. next_metal_resource_ids[var_desc_set] += binding_stride;
  11264. }
  11265. else
  11266. {
  11267. // Allocate from plain bindings which are allocated per resource type.
  11268. switch (basetype)
  11269. {
  11270. case SPIRType::Image:
  11271. resource_index = next_metal_resource_index_texture;
  11272. next_metal_resource_index_texture += binding_stride;
  11273. break;
  11274. case SPIRType::Sampler:
  11275. resource_index = next_metal_resource_index_sampler;
  11276. next_metal_resource_index_sampler += binding_stride;
  11277. break;
  11278. default:
  11279. resource_index = next_metal_resource_index_buffer;
  11280. next_metal_resource_index_buffer += binding_stride;
  11281. break;
  11282. }
  11283. }
  11284. set_extended_decoration(var.self, resource_decoration, resource_index);
  11285. return resource_index;
  11286. }
  11287. bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
  11288. {
  11289. return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  11290. msl_options.use_framebuffer_fetch_subpasses;
  11291. }
  11292. bool CompilerMSL::type_is_pointer(const SPIRType &type) const
  11293. {
  11294. if (!type.pointer)
  11295. return false;
  11296. auto &parent_type = get<SPIRType>(type.parent_type);
  11297. // Safeguards when we forget to set pointer_depth (there is an assert for it in type_to_glsl),
  11298. // but the extra check shouldn't hurt.
  11299. return (type.pointer_depth > parent_type.pointer_depth) || !parent_type.pointer;
  11300. }
  11301. bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const
  11302. {
  11303. if (!type.pointer)
  11304. return false;
  11305. auto &parent_type = get<SPIRType>(type.parent_type);
  11306. return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type);
  11307. }
  11308. string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
  11309. {
  11310. auto &var = get<SPIRVariable>(arg.id);
  11311. auto &type = get_variable_data_type(var);
  11312. auto &var_type = get<SPIRType>(arg.type);
  11313. StorageClass type_storage = var_type.storage;
  11314. bool is_pointer = var_type.pointer;
  11315. // If we need to modify the name of the variable, make sure we use the original variable.
  11316. // Our alias is just a shadow variable.
  11317. uint32_t name_id = var.self;
  11318. if (arg.alias_global_variable && var.basevariable)
  11319. name_id = var.basevariable;
  11320. bool constref = !arg.alias_global_variable && is_pointer && arg.write_count == 0;
  11321. // Framebuffer fetch is plain value, const looks out of place, but it is not wrong.
  11322. if (type_is_msl_framebuffer_fetch(type))
  11323. constref = false;
  11324. bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
  11325. type.basetype == SPIRType::Sampler;
  11326. // Arrays of images/samplers in MSL are always const.
  11327. if (!type.array.empty() && type_is_image)
  11328. constref = true;
  11329. const char *cv_qualifier = constref ? "const " : "";
  11330. string decl;
  11331. // If this is a combined image-sampler for a 2D image with floating-point type,
  11332. // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter
  11333. // for a global, then we need to emit a "dynamic" combined image-sampler.
  11334. // Unfortunately, this is necessary to properly support passing around
  11335. // combined image-samplers with Y'CbCr conversions on them.
  11336. bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage &&
  11337. type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) &&
  11338. spv_function_implementations.count(SPVFuncImplDynamicImageSampler);
  11339. // Allow Metal to use the array<T> template to make arrays a value type
  11340. string address_space = get_argument_address_space(var);
  11341. bool builtin = has_decoration(var.self, DecorationBuiltIn);
  11342. auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn));
  11343. if (address_space == "threadgroup")
  11344. is_using_builtin_array = true;
  11345. if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
  11346. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  11347. else if (builtin)
  11348. {
  11349. // Only use templated array for Clip/Cull distance when feasible.
  11350. // In other scenarios, we need need to override array length for tess levels (if used as outputs),
  11351. // or we need to emit the expected type for builtins (uint vs int).
  11352. auto storage = get<SPIRType>(var.basetype).storage;
  11353. if (storage == StorageClassInput &&
  11354. (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
  11355. {
  11356. is_using_builtin_array = false;
  11357. }
  11358. else if (builtin_type != BuiltInClipDistance && builtin_type != BuiltInCullDistance)
  11359. {
  11360. is_using_builtin_array = true;
  11361. }
  11362. if (storage == StorageClassOutput && variable_storage_requires_stage_io(storage) &&
  11363. !is_stage_output_builtin_masked(builtin_type))
  11364. is_using_builtin_array = true;
  11365. if (is_using_builtin_array)
  11366. decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id));
  11367. else
  11368. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  11369. }
  11370. else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type))
  11371. {
  11372. is_using_builtin_array = true;
  11373. decl += join(cv_qualifier, type_to_glsl(type, arg.id), "*");
  11374. }
  11375. else if (is_dynamic_img_sampler)
  11376. {
  11377. decl = join(cv_qualifier, "spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">");
  11378. // Mark the variable so that we can handle passing it to another function.
  11379. set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  11380. }
  11381. else
  11382. {
  11383. // The type is a pointer type we need to emit cv_qualifier late.
  11384. if (type_is_pointer(type))
  11385. {
  11386. decl = type_to_glsl(type, arg.id);
  11387. if (*cv_qualifier != '\0')
  11388. decl += join(" ", cv_qualifier);
  11389. }
  11390. else
  11391. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  11392. }
  11393. bool opaque_handle = type_storage == StorageClassUniformConstant;
  11394. if (!builtin && !opaque_handle && !is_pointer &&
  11395. (type_storage == StorageClassFunction || type_storage == StorageClassGeneric))
  11396. {
  11397. // If the argument is a pure value and not an opaque type, we will pass by value.
  11398. if (msl_options.force_native_arrays && is_array(type))
  11399. {
  11400. // We are receiving an array by value. This is problematic.
  11401. // We cannot be sure of the target address space since we are supposed to receive a copy,
  11402. // but this is not possible with MSL without some extra work.
  11403. // We will have to assume we're getting a reference in thread address space.
  11404. // If we happen to get a reference in constant address space, the caller must emit a copy and pass that.
  11405. // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from
  11406. // non-constant arrays, but we can create thread const from constant.
  11407. decl = string("thread const ") + decl;
  11408. decl += " (&";
  11409. const char *restrict_kw = to_restrict(name_id);
  11410. if (*restrict_kw)
  11411. {
  11412. decl += " ";
  11413. decl += restrict_kw;
  11414. }
  11415. decl += to_expression(name_id);
  11416. decl += ")";
  11417. decl += type_to_array_glsl(type);
  11418. }
  11419. else
  11420. {
  11421. if (!address_space.empty())
  11422. decl = join(address_space, " ", decl);
  11423. decl += " ";
  11424. decl += to_expression(name_id);
  11425. }
  11426. }
  11427. else if (is_array(type) && !type_is_image)
  11428. {
  11429. // Arrays of images and samplers are special cased.
  11430. if (!address_space.empty())
  11431. decl = join(address_space, " ", decl);
  11432. if (msl_options.argument_buffers)
  11433. {
  11434. uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet);
  11435. if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) &&
  11436. descriptor_set_is_argument_buffer(desc_set))
  11437. {
  11438. // An awkward case where we need to emit *more* address space declarations (yay!).
  11439. // An example is where we pass down an array of buffer pointers to leaf functions.
  11440. // It's a constant array containing pointers to constants.
  11441. // The pointer array is always constant however. E.g.
  11442. // device SSBO * constant (&array)[N].
  11443. // const device SSBO * constant (&array)[N].
  11444. // constant SSBO * constant (&array)[N].
  11445. // However, this only matters for argument buffers, since for MSL 1.0 style codegen,
  11446. // we emit the buffer array on stack instead, and that seems to work just fine apparently.
  11447. // If the argument was marked as being in device address space, any pointer to member would
  11448. // be const device, not constant.
  11449. if (argument_buffer_device_storage_mask & (1u << desc_set))
  11450. decl += " const device";
  11451. else
  11452. decl += " constant";
  11453. }
  11454. }
  11455. // Special case, need to override the array size here if we're using tess level as an argument.
  11456. if (get_execution_model() == ExecutionModelTessellationControl && builtin &&
  11457. (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
  11458. {
  11459. uint32_t array_size = get_physical_tess_level_array_size(builtin_type);
  11460. if (array_size == 1)
  11461. {
  11462. decl += " &";
  11463. decl += to_expression(name_id);
  11464. }
  11465. else
  11466. {
  11467. decl += " (&";
  11468. decl += to_expression(name_id);
  11469. decl += ")";
  11470. decl += join("[", array_size, "]");
  11471. }
  11472. }
  11473. else
  11474. {
  11475. auto array_size_decl = type_to_array_glsl(type);
  11476. if (array_size_decl.empty())
  11477. decl += "& ";
  11478. else
  11479. decl += " (&";
  11480. const char *restrict_kw = to_restrict(name_id);
  11481. if (*restrict_kw)
  11482. {
  11483. decl += " ";
  11484. decl += restrict_kw;
  11485. }
  11486. decl += to_expression(name_id);
  11487. if (!array_size_decl.empty())
  11488. {
  11489. decl += ")";
  11490. decl += array_size_decl;
  11491. }
  11492. }
  11493. }
  11494. else if (!opaque_handle && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct))
  11495. {
  11496. // If this is going to be a reference to a variable pointer, the address space
  11497. // for the reference has to go before the '&', but after the '*'.
  11498. if (!address_space.empty())
  11499. {
  11500. if (type_is_pointer(type))
  11501. {
  11502. if (*cv_qualifier == '\0')
  11503. decl += ' ';
  11504. decl += join(address_space, " ");
  11505. }
  11506. else
  11507. decl = join(address_space, " ", decl);
  11508. }
  11509. decl += "&";
  11510. decl += " ";
  11511. decl += to_restrict(name_id);
  11512. decl += to_expression(name_id);
  11513. }
  11514. else
  11515. {
  11516. if (!address_space.empty())
  11517. decl = join(address_space, " ", decl);
  11518. decl += " ";
  11519. decl += to_expression(name_id);
  11520. }
  11521. // Emulate texture2D atomic operations
  11522. auto *backing_var = maybe_get_backing_variable(name_id);
  11523. if (backing_var && atomic_image_vars.count(backing_var->self))
  11524. {
  11525. decl += ", device atomic_" + type_to_glsl(get<SPIRType>(var_type.image.type), 0);
  11526. decl += "* " + to_expression(name_id) + "_atomic";
  11527. }
  11528. is_using_builtin_array = false;
  11529. return decl;
  11530. }
  11531. // If we're currently in the entry point function, and the object
  11532. // has a qualified name, use it, otherwise use the standard name.
  11533. string CompilerMSL::to_name(uint32_t id, bool allow_alias) const
  11534. {
  11535. if (current_function && (current_function->self == ir.default_entry_point))
  11536. {
  11537. auto *m = ir.find_meta(id);
  11538. if (m && !m->decoration.qualified_alias.empty())
  11539. return m->decoration.qualified_alias;
  11540. }
  11541. return Compiler::to_name(id, allow_alias);
  11542. }
  11543. // Returns a name that combines the name of the struct with the name of the member, except for Builtins
  11544. string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index)
  11545. {
  11546. // Don't qualify Builtin names because they are unique and are treated as such when building expressions
  11547. BuiltIn builtin = BuiltInMax;
  11548. if (is_member_builtin(type, index, &builtin))
  11549. return builtin_to_glsl(builtin, type.storage);
  11550. // Strip any underscore prefix from member name
  11551. string mbr_name = to_member_name(type, index);
  11552. size_t startPos = mbr_name.find_first_not_of("_");
  11553. mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : "";
  11554. return join(to_name(type.self), "_", mbr_name);
  11555. }
  11556. // Ensures that the specified name is permanently usable by prepending a prefix
  11557. // if the first chars are _ and a digit, which indicate a transient name.
  11558. string CompilerMSL::ensure_valid_name(string name, string pfx)
  11559. {
  11560. return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name;
  11561. }
  11562. const std::unordered_set<std::string> &CompilerMSL::get_reserved_keyword_set()
  11563. {
  11564. static const unordered_set<string> keywords = {
  11565. "kernel",
  11566. "vertex",
  11567. "fragment",
  11568. "compute",
  11569. "bias",
  11570. "level",
  11571. "gradient2d",
  11572. "gradientcube",
  11573. "gradient3d",
  11574. "min_lod_clamp",
  11575. "assert",
  11576. "VARIABLE_TRACEPOINT",
  11577. "STATIC_DATA_TRACEPOINT",
  11578. "STATIC_DATA_TRACEPOINT_V",
  11579. "METAL_ALIGN",
  11580. "METAL_ASM",
  11581. "METAL_CONST",
  11582. "METAL_DEPRECATED",
  11583. "METAL_ENABLE_IF",
  11584. "METAL_FUNC",
  11585. "METAL_INTERNAL",
  11586. "METAL_NON_NULL_RETURN",
  11587. "METAL_NORETURN",
  11588. "METAL_NOTHROW",
  11589. "METAL_PURE",
  11590. "METAL_UNAVAILABLE",
  11591. "METAL_IMPLICIT",
  11592. "METAL_EXPLICIT",
  11593. "METAL_CONST_ARG",
  11594. "METAL_ARG_UNIFORM",
  11595. "METAL_ZERO_ARG",
  11596. "METAL_VALID_LOD_ARG",
  11597. "METAL_VALID_LEVEL_ARG",
  11598. "METAL_VALID_STORE_ORDER",
  11599. "METAL_VALID_LOAD_ORDER",
  11600. "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
  11601. "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
  11602. "METAL_VALID_RENDER_TARGET",
  11603. "is_function_constant_defined",
  11604. "CHAR_BIT",
  11605. "SCHAR_MAX",
  11606. "SCHAR_MIN",
  11607. "UCHAR_MAX",
  11608. "CHAR_MAX",
  11609. "CHAR_MIN",
  11610. "USHRT_MAX",
  11611. "SHRT_MAX",
  11612. "SHRT_MIN",
  11613. "UINT_MAX",
  11614. "INT_MAX",
  11615. "INT_MIN",
  11616. "FLT_DIG",
  11617. "FLT_MANT_DIG",
  11618. "FLT_MAX_10_EXP",
  11619. "FLT_MAX_EXP",
  11620. "FLT_MIN_10_EXP",
  11621. "FLT_MIN_EXP",
  11622. "FLT_RADIX",
  11623. "FLT_MAX",
  11624. "FLT_MIN",
  11625. "FLT_EPSILON",
  11626. "FP_ILOGB0",
  11627. "FP_ILOGBNAN",
  11628. "MAXFLOAT",
  11629. "HUGE_VALF",
  11630. "INFINITY",
  11631. "NAN",
  11632. "M_E_F",
  11633. "M_LOG2E_F",
  11634. "M_LOG10E_F",
  11635. "M_LN2_F",
  11636. "M_LN10_F",
  11637. "M_PI_F",
  11638. "M_PI_2_F",
  11639. "M_PI_4_F",
  11640. "M_1_PI_F",
  11641. "M_2_PI_F",
  11642. "M_2_SQRTPI_F",
  11643. "M_SQRT2_F",
  11644. "M_SQRT1_2_F",
  11645. "HALF_DIG",
  11646. "HALF_MANT_DIG",
  11647. "HALF_MAX_10_EXP",
  11648. "HALF_MAX_EXP",
  11649. "HALF_MIN_10_EXP",
  11650. "HALF_MIN_EXP",
  11651. "HALF_RADIX",
  11652. "HALF_MAX",
  11653. "HALF_MIN",
  11654. "HALF_EPSILON",
  11655. "MAXHALF",
  11656. "HUGE_VALH",
  11657. "M_E_H",
  11658. "M_LOG2E_H",
  11659. "M_LOG10E_H",
  11660. "M_LN2_H",
  11661. "M_LN10_H",
  11662. "M_PI_H",
  11663. "M_PI_2_H",
  11664. "M_PI_4_H",
  11665. "M_1_PI_H",
  11666. "M_2_PI_H",
  11667. "M_2_SQRTPI_H",
  11668. "M_SQRT2_H",
  11669. "M_SQRT1_2_H",
  11670. "DBL_DIG",
  11671. "DBL_MANT_DIG",
  11672. "DBL_MAX_10_EXP",
  11673. "DBL_MAX_EXP",
  11674. "DBL_MIN_10_EXP",
  11675. "DBL_MIN_EXP",
  11676. "DBL_RADIX",
  11677. "DBL_MAX",
  11678. "DBL_MIN",
  11679. "DBL_EPSILON",
  11680. "HUGE_VAL",
  11681. "M_E",
  11682. "M_LOG2E",
  11683. "M_LOG10E",
  11684. "M_LN2",
  11685. "M_LN10",
  11686. "M_PI",
  11687. "M_PI_2",
  11688. "M_PI_4",
  11689. "M_1_PI",
  11690. "M_2_PI",
  11691. "M_2_SQRTPI",
  11692. "M_SQRT2",
  11693. "M_SQRT1_2",
  11694. "quad_broadcast",
  11695. };
  11696. return keywords;
  11697. }
  11698. const std::unordered_set<std::string> &CompilerMSL::get_illegal_func_names()
  11699. {
  11700. static const unordered_set<string> illegal_func_names = {
  11701. "main",
  11702. "saturate",
  11703. "assert",
  11704. "fmin3",
  11705. "fmax3",
  11706. "VARIABLE_TRACEPOINT",
  11707. "STATIC_DATA_TRACEPOINT",
  11708. "STATIC_DATA_TRACEPOINT_V",
  11709. "METAL_ALIGN",
  11710. "METAL_ASM",
  11711. "METAL_CONST",
  11712. "METAL_DEPRECATED",
  11713. "METAL_ENABLE_IF",
  11714. "METAL_FUNC",
  11715. "METAL_INTERNAL",
  11716. "METAL_NON_NULL_RETURN",
  11717. "METAL_NORETURN",
  11718. "METAL_NOTHROW",
  11719. "METAL_PURE",
  11720. "METAL_UNAVAILABLE",
  11721. "METAL_IMPLICIT",
  11722. "METAL_EXPLICIT",
  11723. "METAL_CONST_ARG",
  11724. "METAL_ARG_UNIFORM",
  11725. "METAL_ZERO_ARG",
  11726. "METAL_VALID_LOD_ARG",
  11727. "METAL_VALID_LEVEL_ARG",
  11728. "METAL_VALID_STORE_ORDER",
  11729. "METAL_VALID_LOAD_ORDER",
  11730. "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
  11731. "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
  11732. "METAL_VALID_RENDER_TARGET",
  11733. "is_function_constant_defined",
  11734. "CHAR_BIT",
  11735. "SCHAR_MAX",
  11736. "SCHAR_MIN",
  11737. "UCHAR_MAX",
  11738. "CHAR_MAX",
  11739. "CHAR_MIN",
  11740. "USHRT_MAX",
  11741. "SHRT_MAX",
  11742. "SHRT_MIN",
  11743. "UINT_MAX",
  11744. "INT_MAX",
  11745. "INT_MIN",
  11746. "FLT_DIG",
  11747. "FLT_MANT_DIG",
  11748. "FLT_MAX_10_EXP",
  11749. "FLT_MAX_EXP",
  11750. "FLT_MIN_10_EXP",
  11751. "FLT_MIN_EXP",
  11752. "FLT_RADIX",
  11753. "FLT_MAX",
  11754. "FLT_MIN",
  11755. "FLT_EPSILON",
  11756. "FP_ILOGB0",
  11757. "FP_ILOGBNAN",
  11758. "MAXFLOAT",
  11759. "HUGE_VALF",
  11760. "INFINITY",
  11761. "NAN",
  11762. "M_E_F",
  11763. "M_LOG2E_F",
  11764. "M_LOG10E_F",
  11765. "M_LN2_F",
  11766. "M_LN10_F",
  11767. "M_PI_F",
  11768. "M_PI_2_F",
  11769. "M_PI_4_F",
  11770. "M_1_PI_F",
  11771. "M_2_PI_F",
  11772. "M_2_SQRTPI_F",
  11773. "M_SQRT2_F",
  11774. "M_SQRT1_2_F",
  11775. "HALF_DIG",
  11776. "HALF_MANT_DIG",
  11777. "HALF_MAX_10_EXP",
  11778. "HALF_MAX_EXP",
  11779. "HALF_MIN_10_EXP",
  11780. "HALF_MIN_EXP",
  11781. "HALF_RADIX",
  11782. "HALF_MAX",
  11783. "HALF_MIN",
  11784. "HALF_EPSILON",
  11785. "MAXHALF",
  11786. "HUGE_VALH",
  11787. "M_E_H",
  11788. "M_LOG2E_H",
  11789. "M_LOG10E_H",
  11790. "M_LN2_H",
  11791. "M_LN10_H",
  11792. "M_PI_H",
  11793. "M_PI_2_H",
  11794. "M_PI_4_H",
  11795. "M_1_PI_H",
  11796. "M_2_PI_H",
  11797. "M_2_SQRTPI_H",
  11798. "M_SQRT2_H",
  11799. "M_SQRT1_2_H",
  11800. "DBL_DIG",
  11801. "DBL_MANT_DIG",
  11802. "DBL_MAX_10_EXP",
  11803. "DBL_MAX_EXP",
  11804. "DBL_MIN_10_EXP",
  11805. "DBL_MIN_EXP",
  11806. "DBL_RADIX",
  11807. "DBL_MAX",
  11808. "DBL_MIN",
  11809. "DBL_EPSILON",
  11810. "HUGE_VAL",
  11811. "M_E",
  11812. "M_LOG2E",
  11813. "M_LOG10E",
  11814. "M_LN2",
  11815. "M_LN10",
  11816. "M_PI",
  11817. "M_PI_2",
  11818. "M_PI_4",
  11819. "M_1_PI",
  11820. "M_2_PI",
  11821. "M_2_SQRTPI",
  11822. "M_SQRT2",
  11823. "M_SQRT1_2",
  11824. };
  11825. return illegal_func_names;
  11826. }
  11827. // Replace all names that match MSL keywords or Metal Standard Library functions.
  11828. void CompilerMSL::replace_illegal_names()
  11829. {
  11830. // FIXME: MSL and GLSL are doing two different things here.
  11831. // Agree on convention and remove this override.
  11832. auto &keywords = get_reserved_keyword_set();
  11833. auto &illegal_func_names = get_illegal_func_names();
  11834. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &) {
  11835. auto *meta = ir.find_meta(self);
  11836. if (!meta)
  11837. return;
  11838. auto &dec = meta->decoration;
  11839. if (keywords.find(dec.alias) != end(keywords))
  11840. dec.alias += "0";
  11841. });
  11842. ir.for_each_typed_id<SPIRFunction>([&](uint32_t self, SPIRFunction &) {
  11843. auto *meta = ir.find_meta(self);
  11844. if (!meta)
  11845. return;
  11846. auto &dec = meta->decoration;
  11847. if (illegal_func_names.find(dec.alias) != end(illegal_func_names))
  11848. dec.alias += "0";
  11849. });
  11850. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &) {
  11851. auto *meta = ir.find_meta(self);
  11852. if (!meta)
  11853. return;
  11854. for (auto &mbr_dec : meta->members)
  11855. if (keywords.find(mbr_dec.alias) != end(keywords))
  11856. mbr_dec.alias += "0";
  11857. });
  11858. CompilerGLSL::replace_illegal_names();
  11859. }
  11860. void CompilerMSL::replace_illegal_entry_point_names()
  11861. {
  11862. auto &illegal_func_names = get_illegal_func_names();
  11863. // It is important to this before we fixup identifiers,
  11864. // since if ep_name is reserved, we will need to fix that up,
  11865. // and then copy alias back into entry.name after the fixup.
  11866. for (auto &entry : ir.entry_points)
  11867. {
  11868. // Change both the entry point name and the alias, to keep them synced.
  11869. string &ep_name = entry.second.name;
  11870. if (illegal_func_names.find(ep_name) != end(illegal_func_names))
  11871. ep_name += "0";
  11872. ir.meta[entry.first].decoration.alias = ep_name;
  11873. }
  11874. }
  11875. void CompilerMSL::sync_entry_point_aliases_and_names()
  11876. {
  11877. for (auto &entry : ir.entry_points)
  11878. entry.second.name = ir.meta[entry.first].decoration.alias;
  11879. }
  11880. string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain)
  11881. {
  11882. auto *var = maybe_get<SPIRVariable>(base);
  11883. // If this is a buffer array, we have to dereference the buffer pointers.
  11884. // Otherwise, if this is a pointer expression, dereference it.
  11885. bool declared_as_pointer = false;
  11886. if (var)
  11887. {
  11888. // Only allow -> dereference for block types. This is so we get expressions like
  11889. // buffer[i]->first_member.second_member, rather than buffer[i]->first->second.
  11890. bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  11891. bool is_buffer_variable =
  11892. is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer);
  11893. declared_as_pointer = is_buffer_variable && is_array(get<SPIRType>(var->basetype));
  11894. }
  11895. if (declared_as_pointer || (!ptr_chain && should_dereference(base)))
  11896. return join("->", to_member_name(type, index));
  11897. else
  11898. return join(".", to_member_name(type, index));
  11899. }
  11900. string CompilerMSL::to_qualifiers_glsl(uint32_t id)
  11901. {
  11902. string quals;
  11903. auto *var = maybe_get<SPIRVariable>(id);
  11904. auto &type = expression_type(id);
  11905. if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)))
  11906. quals += "threadgroup ";
  11907. return quals;
  11908. }
  11909. // The optional id parameter indicates the object whose type we are trying
  11910. // to find the description for. It is optional. Most type descriptions do not
  11911. // depend on a specific object's use of that type.
  11912. string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
  11913. {
  11914. string type_name;
  11915. // Pointer?
  11916. if (type.pointer)
  11917. {
  11918. assert(type.pointer_depth > 0);
  11919. const char *restrict_kw;
  11920. auto type_address_space = get_type_address_space(type, id);
  11921. auto type_decl = type_to_glsl(get<SPIRType>(type.parent_type), id);
  11922. // Work around C pointer qualifier rules. If glsl_type is a pointer type as well
  11923. // we'll need to emit the address space to the right.
  11924. // We could always go this route, but it makes the code unnatural.
  11925. // Prefer emitting thread T *foo over T thread* foo since it's more readable,
  11926. // but we'll have to emit thread T * thread * T constant bar; for example.
  11927. if (type_is_pointer_to_pointer(type))
  11928. type_name = join(type_decl, " ", type_address_space, " ");
  11929. else
  11930. type_name = join(type_address_space, " ", type_decl);
  11931. switch (type.basetype)
  11932. {
  11933. case SPIRType::Image:
  11934. case SPIRType::SampledImage:
  11935. case SPIRType::Sampler:
  11936. // These are handles.
  11937. break;
  11938. default:
  11939. // Anything else can be a raw pointer.
  11940. type_name += "*";
  11941. restrict_kw = to_restrict(id);
  11942. if (*restrict_kw)
  11943. {
  11944. type_name += " ";
  11945. type_name += restrict_kw;
  11946. }
  11947. break;
  11948. }
  11949. return type_name;
  11950. }
  11951. switch (type.basetype)
  11952. {
  11953. case SPIRType::Struct:
  11954. // Need OpName lookup here to get a "sensible" name for a struct.
  11955. // Allow Metal to use the array<T> template to make arrays a value type
  11956. type_name = to_name(type.self);
  11957. break;
  11958. case SPIRType::Image:
  11959. case SPIRType::SampledImage:
  11960. return image_type_glsl(type, id);
  11961. case SPIRType::Sampler:
  11962. return sampler_type(type, id);
  11963. case SPIRType::Void:
  11964. return "void";
  11965. case SPIRType::AtomicCounter:
  11966. return "atomic_uint";
  11967. case SPIRType::ControlPointArray:
  11968. return join("patch_control_point<", type_to_glsl(get<SPIRType>(type.parent_type), id), ">");
  11969. case SPIRType::Interpolant:
  11970. return join("interpolant<", type_to_glsl(get<SPIRType>(type.parent_type), id), ", interpolation::",
  11971. has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">");
  11972. // Scalars
  11973. case SPIRType::Boolean:
  11974. {
  11975. auto *var = maybe_get_backing_variable(id);
  11976. if (var && var->basevariable)
  11977. var = &get<SPIRVariable>(var->basevariable);
  11978. // Need to special-case threadgroup booleans. They are supposed to be logical
  11979. // storage, but MSL compilers will sometimes crash if you use threadgroup bool.
  11980. // Workaround this by using 16-bit types instead and fixup on load-store to this data.
  11981. // FIXME: We have no sane way of working around this problem if a struct member is boolean
  11982. // and that struct is used as a threadgroup variable, but ... sigh.
  11983. if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup)
  11984. type_name = "short";
  11985. else
  11986. type_name = "bool";
  11987. break;
  11988. }
  11989. case SPIRType::Char:
  11990. case SPIRType::SByte:
  11991. type_name = "char";
  11992. break;
  11993. case SPIRType::UByte:
  11994. type_name = "uchar";
  11995. break;
  11996. case SPIRType::Short:
  11997. type_name = "short";
  11998. break;
  11999. case SPIRType::UShort:
  12000. type_name = "ushort";
  12001. break;
  12002. case SPIRType::Int:
  12003. type_name = "int";
  12004. break;
  12005. case SPIRType::UInt:
  12006. type_name = "uint";
  12007. break;
  12008. case SPIRType::Int64:
  12009. if (!msl_options.supports_msl_version(2, 2))
  12010. SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
  12011. type_name = "long";
  12012. break;
  12013. case SPIRType::UInt64:
  12014. if (!msl_options.supports_msl_version(2, 2))
  12015. SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
  12016. type_name = "ulong";
  12017. break;
  12018. case SPIRType::Half:
  12019. type_name = "half";
  12020. break;
  12021. case SPIRType::Float:
  12022. type_name = "float";
  12023. break;
  12024. case SPIRType::Double:
  12025. type_name = "double"; // Currently unsupported
  12026. break;
  12027. case SPIRType::AccelerationStructure:
  12028. if (msl_options.supports_msl_version(2, 4))
  12029. type_name = "acceleration_structure<instancing>";
  12030. else if (msl_options.supports_msl_version(2, 3))
  12031. type_name = "instance_acceleration_structure";
  12032. else
  12033. SPIRV_CROSS_THROW("Acceleration Structure Type is supported in MSL 2.3 and above.");
  12034. break;
  12035. case SPIRType::RayQuery:
  12036. return "intersection_query<instancing, triangle_data>";
  12037. default:
  12038. return "unknown_type";
  12039. }
  12040. // Matrix?
  12041. if (type.columns > 1)
  12042. type_name += to_string(type.columns) + "x";
  12043. // Vector or Matrix?
  12044. if (type.vecsize > 1)
  12045. type_name += to_string(type.vecsize);
  12046. if (type.array.empty() || using_builtin_array())
  12047. {
  12048. return type_name;
  12049. }
  12050. else
  12051. {
  12052. // Allow Metal to use the array<T> template to make arrays a value type
  12053. add_spv_func_and_recompile(SPVFuncImplUnsafeArray);
  12054. string res;
  12055. string sizes;
  12056. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  12057. {
  12058. res += "spvUnsafeArray<";
  12059. sizes += ", ";
  12060. sizes += to_array_size(type, i);
  12061. sizes += ">";
  12062. }
  12063. res += type_name + sizes;
  12064. return res;
  12065. }
  12066. }
  12067. string CompilerMSL::type_to_array_glsl(const SPIRType &type)
  12068. {
  12069. // Allow Metal to use the array<T> template to make arrays a value type
  12070. switch (type.basetype)
  12071. {
  12072. case SPIRType::AtomicCounter:
  12073. case SPIRType::ControlPointArray:
  12074. case SPIRType::RayQuery:
  12075. {
  12076. return CompilerGLSL::type_to_array_glsl(type);
  12077. }
  12078. default:
  12079. {
  12080. if (using_builtin_array())
  12081. return CompilerGLSL::type_to_array_glsl(type);
  12082. else
  12083. return "";
  12084. }
  12085. }
  12086. }
  12087. string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop)
  12088. {
  12089. switch (cop.opcode)
  12090. {
  12091. case OpQuantizeToF16:
  12092. add_spv_func_and_recompile(SPVFuncImplQuantizeToF16);
  12093. return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")");
  12094. default:
  12095. return CompilerGLSL::constant_op_expression(cop);
  12096. }
  12097. }
  12098. bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const
  12099. {
  12100. if (variable.storage == storage)
  12101. return true;
  12102. if (storage == StorageClassWorkgroup)
  12103. {
  12104. auto model = get_execution_model();
  12105. // Specially masked IO block variable.
  12106. // Normally, we will never access IO blocks directly here.
  12107. // The only scenario which that should occur is with a masked IO block.
  12108. if (model == ExecutionModelTessellationControl && variable.storage == StorageClassOutput &&
  12109. has_decoration(get<SPIRType>(variable.basetype).self, DecorationBlock))
  12110. {
  12111. return true;
  12112. }
  12113. return variable.storage == StorageClassOutput &&
  12114. model == ExecutionModelTessellationControl &&
  12115. is_stage_output_variable_masked(variable);
  12116. }
  12117. else if (storage == StorageClassStorageBuffer)
  12118. {
  12119. // We won't be able to catch writes to control point outputs here since variable
  12120. // refers to a function local pointer.
  12121. // This is fine, as there cannot be concurrent writers to that memory anyways,
  12122. // so we just ignore that case.
  12123. return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
  12124. !variable_storage_requires_stage_io(variable.storage) &&
  12125. (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
  12126. }
  12127. else
  12128. {
  12129. return false;
  12130. }
  12131. }
  12132. std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
  12133. {
  12134. bool old_is_using_builtin_array = is_using_builtin_array;
  12135. // Threadgroup arrays can't have a wrapper type.
  12136. if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
  12137. is_using_builtin_array = true;
  12138. std::string expr = CompilerGLSL::variable_decl(variable);
  12139. is_using_builtin_array = old_is_using_builtin_array;
  12140. return expr;
  12141. }
  12142. // GCC workaround of lambdas calling protected funcs
  12143. std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id)
  12144. {
  12145. return CompilerGLSL::variable_decl(type, name, id);
  12146. }
  12147. std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id)
  12148. {
  12149. auto *var = maybe_get<SPIRVariable>(id);
  12150. if (var && var->basevariable)
  12151. {
  12152. // Check against the base variable, and not a fake ID which might have been generated for this variable.
  12153. id = var->basevariable;
  12154. }
  12155. if (!type.array.empty())
  12156. {
  12157. if (!msl_options.supports_msl_version(2))
  12158. SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of samplers.");
  12159. if (type.array.size() > 1)
  12160. SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL.");
  12161. // Arrays of samplers in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
  12162. // If we have a runtime array, it could be a variable-count descriptor set binding.
  12163. uint32_t array_size = to_array_size_literal(type);
  12164. if (array_size == 0)
  12165. array_size = get_resource_array_size(id);
  12166. if (array_size == 0)
  12167. SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL.");
  12168. auto &parent = get<SPIRType>(get_pointee_type(type).parent_type);
  12169. return join("array<", sampler_type(parent, id), ", ", array_size, ">");
  12170. }
  12171. else
  12172. return "sampler";
  12173. }
  12174. // Returns an MSL string describing the SPIR-V image type
  12175. string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
  12176. {
  12177. auto *var = maybe_get<SPIRVariable>(id);
  12178. if (var && var->basevariable)
  12179. {
  12180. // For comparison images, check against the base variable,
  12181. // and not the fake ID which might have been generated for this variable.
  12182. id = var->basevariable;
  12183. }
  12184. if (!type.array.empty())
  12185. {
  12186. uint32_t major = 2, minor = 0;
  12187. if (msl_options.is_ios())
  12188. {
  12189. major = 1;
  12190. minor = 2;
  12191. }
  12192. if (!msl_options.supports_msl_version(major, minor))
  12193. {
  12194. if (msl_options.is_ios())
  12195. SPIRV_CROSS_THROW("MSL 1.2 or greater is required for arrays of textures.");
  12196. else
  12197. SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of textures.");
  12198. }
  12199. if (type.array.size() > 1)
  12200. SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL.");
  12201. // Arrays of images in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
  12202. // If we have a runtime array, it could be a variable-count descriptor set binding.
  12203. uint32_t array_size = to_array_size_literal(type);
  12204. if (array_size == 0)
  12205. array_size = get_resource_array_size(id);
  12206. if (array_size == 0)
  12207. SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL.");
  12208. auto &parent = get<SPIRType>(get_pointee_type(type).parent_type);
  12209. return join("array<", image_type_glsl(parent, id), ", ", array_size, ">");
  12210. }
  12211. string img_type_name;
  12212. // Bypass pointers because we need the real image struct
  12213. auto &img_type = get<SPIRType>(type.self).image;
  12214. if (image_is_comparison(type, id))
  12215. {
  12216. switch (img_type.dim)
  12217. {
  12218. case Dim1D:
  12219. case Dim2D:
  12220. if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
  12221. {
  12222. // Use a native Metal 1D texture
  12223. img_type_name += "depth1d_unsupported_by_metal";
  12224. break;
  12225. }
  12226. if (img_type.ms && img_type.arrayed)
  12227. {
  12228. if (!msl_options.supports_msl_version(2, 1))
  12229. SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1.");
  12230. img_type_name += "depth2d_ms_array";
  12231. }
  12232. else if (img_type.ms)
  12233. img_type_name += "depth2d_ms";
  12234. else if (img_type.arrayed)
  12235. img_type_name += "depth2d_array";
  12236. else
  12237. img_type_name += "depth2d";
  12238. break;
  12239. case Dim3D:
  12240. img_type_name += "depth3d_unsupported_by_metal";
  12241. break;
  12242. case DimCube:
  12243. if (!msl_options.emulate_cube_array)
  12244. img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube");
  12245. else
  12246. img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube");
  12247. break;
  12248. default:
  12249. img_type_name += "unknown_depth_texture_type";
  12250. break;
  12251. }
  12252. }
  12253. else
  12254. {
  12255. switch (img_type.dim)
  12256. {
  12257. case DimBuffer:
  12258. if (img_type.ms || img_type.arrayed)
  12259. SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers.");
  12260. if (msl_options.texture_buffer_native)
  12261. {
  12262. if (!msl_options.supports_msl_version(2, 1))
  12263. SPIRV_CROSS_THROW("Native texture_buffer type is only supported in MSL 2.1.");
  12264. img_type_name = "texture_buffer";
  12265. }
  12266. else
  12267. img_type_name += "texture2d";
  12268. break;
  12269. case Dim1D:
  12270. case Dim2D:
  12271. case DimSubpassData:
  12272. {
  12273. bool subpass_array =
  12274. img_type.dim == DimSubpassData && (msl_options.multiview || msl_options.arrayed_subpass_input);
  12275. if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
  12276. {
  12277. // Use a native Metal 1D texture
  12278. img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d");
  12279. break;
  12280. }
  12281. // Use Metal's native frame-buffer fetch API for subpass inputs.
  12282. if (type_is_msl_framebuffer_fetch(type))
  12283. {
  12284. auto img_type_4 = get<SPIRType>(img_type.type);
  12285. img_type_4.vecsize = 4;
  12286. return type_to_glsl(img_type_4);
  12287. }
  12288. if (img_type.ms && (img_type.arrayed || subpass_array))
  12289. {
  12290. if (!msl_options.supports_msl_version(2, 1))
  12291. SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1.");
  12292. img_type_name += "texture2d_ms_array";
  12293. }
  12294. else if (img_type.ms)
  12295. img_type_name += "texture2d_ms";
  12296. else if (img_type.arrayed || subpass_array)
  12297. img_type_name += "texture2d_array";
  12298. else
  12299. img_type_name += "texture2d";
  12300. break;
  12301. }
  12302. case Dim3D:
  12303. img_type_name += "texture3d";
  12304. break;
  12305. case DimCube:
  12306. if (!msl_options.emulate_cube_array)
  12307. img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube");
  12308. else
  12309. img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube");
  12310. break;
  12311. default:
  12312. img_type_name += "unknown_texture_type";
  12313. break;
  12314. }
  12315. }
  12316. // Append the pixel type
  12317. img_type_name += "<";
  12318. img_type_name += type_to_glsl(get<SPIRType>(img_type.type));
  12319. // For unsampled images, append the sample/read/write access qualifier.
  12320. // For kernel images, the access qualifier my be supplied directly by SPIR-V.
  12321. // Otherwise it may be set based on whether the image is read from or written to within the shader.
  12322. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  12323. {
  12324. switch (img_type.access)
  12325. {
  12326. case AccessQualifierReadOnly:
  12327. img_type_name += ", access::read";
  12328. break;
  12329. case AccessQualifierWriteOnly:
  12330. img_type_name += ", access::write";
  12331. break;
  12332. case AccessQualifierReadWrite:
  12333. img_type_name += ", access::read_write";
  12334. break;
  12335. default:
  12336. {
  12337. auto *p_var = maybe_get_backing_variable(id);
  12338. if (p_var && p_var->basevariable)
  12339. p_var = maybe_get<SPIRVariable>(p_var->basevariable);
  12340. if (p_var && !has_decoration(p_var->self, DecorationNonWritable))
  12341. {
  12342. img_type_name += ", access::";
  12343. if (!has_decoration(p_var->self, DecorationNonReadable))
  12344. img_type_name += "read_";
  12345. img_type_name += "write";
  12346. }
  12347. break;
  12348. }
  12349. }
  12350. }
  12351. img_type_name += ">";
  12352. return img_type_name;
  12353. }
  12354. void CompilerMSL::emit_subgroup_op(const Instruction &i)
  12355. {
  12356. const uint32_t *ops = stream(i);
  12357. auto op = static_cast<Op>(i.op);
  12358. if (msl_options.emulate_subgroups)
  12359. {
  12360. // In this mode, only the GroupNonUniform cap is supported. The only op
  12361. // we need to handle, then, is OpGroupNonUniformElect.
  12362. if (op != OpGroupNonUniformElect)
  12363. SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect.");
  12364. // In this mode, the subgroup size is assumed to be one, so every invocation
  12365. // is elected.
  12366. emit_op(ops[0], ops[1], "true", true);
  12367. return;
  12368. }
  12369. // Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with
  12370. // full support in 13.0 (2.2). macOS only supports broadcast and shuffle on
  12371. // 10.13 (2.0), with full support in 10.14 (2.1).
  12372. // Note that Apple GPUs before A13 make no distinction between a quad-group
  12373. // and a SIMD-group; all SIMD-groups are quad-groups on those.
  12374. if (!msl_options.supports_msl_version(2))
  12375. SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
  12376. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  12377. uint32_t integer_width = get_integer_width_for_instruction(i);
  12378. auto int_type = to_signed_basetype(integer_width);
  12379. auto uint_type = to_unsigned_basetype(integer_width);
  12380. if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions))
  12381. {
  12382. switch (op)
  12383. {
  12384. default:
  12385. SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up.");
  12386. case OpGroupNonUniformBroadcastFirst:
  12387. if (!msl_options.supports_msl_version(2, 2))
  12388. SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up.");
  12389. break;
  12390. case OpGroupNonUniformElect:
  12391. if (!msl_options.supports_msl_version(2, 2))
  12392. SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up.");
  12393. break;
  12394. case OpGroupNonUniformAny:
  12395. case OpGroupNonUniformAll:
  12396. case OpGroupNonUniformAllEqual:
  12397. case OpGroupNonUniformBallot:
  12398. case OpGroupNonUniformInverseBallot:
  12399. case OpGroupNonUniformBallotBitExtract:
  12400. case OpGroupNonUniformBallotFindLSB:
  12401. case OpGroupNonUniformBallotFindMSB:
  12402. case OpGroupNonUniformBallotBitCount:
  12403. if (!msl_options.supports_msl_version(2, 2))
  12404. SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up.");
  12405. break;
  12406. case OpGroupNonUniformBroadcast:
  12407. case OpGroupNonUniformShuffle:
  12408. case OpGroupNonUniformShuffleXor:
  12409. case OpGroupNonUniformShuffleUp:
  12410. case OpGroupNonUniformShuffleDown:
  12411. case OpGroupNonUniformQuadSwap:
  12412. case OpGroupNonUniformQuadBroadcast:
  12413. break;
  12414. }
  12415. }
  12416. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  12417. {
  12418. switch (op)
  12419. {
  12420. default:
  12421. SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.1 and up.");
  12422. case OpGroupNonUniformBroadcast:
  12423. case OpGroupNonUniformShuffle:
  12424. case OpGroupNonUniformShuffleXor:
  12425. case OpGroupNonUniformShuffleUp:
  12426. case OpGroupNonUniformShuffleDown:
  12427. break;
  12428. }
  12429. }
  12430. uint32_t result_type = ops[0];
  12431. uint32_t id = ops[1];
  12432. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  12433. if (scope != ScopeSubgroup)
  12434. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  12435. switch (op)
  12436. {
  12437. case OpGroupNonUniformElect:
  12438. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  12439. emit_op(result_type, id, "quad_is_first()", false);
  12440. else
  12441. emit_op(result_type, id, "simd_is_first()", false);
  12442. break;
  12443. case OpGroupNonUniformBroadcast:
  12444. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast");
  12445. break;
  12446. case OpGroupNonUniformBroadcastFirst:
  12447. emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst");
  12448. break;
  12449. case OpGroupNonUniformBallot:
  12450. emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallot");
  12451. break;
  12452. case OpGroupNonUniformInverseBallot:
  12453. emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract");
  12454. break;
  12455. case OpGroupNonUniformBallotBitExtract:
  12456. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBallotBitExtract");
  12457. break;
  12458. case OpGroupNonUniformBallotFindLSB:
  12459. emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
  12460. break;
  12461. case OpGroupNonUniformBallotFindMSB:
  12462. emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
  12463. break;
  12464. case OpGroupNonUniformBallotBitCount:
  12465. {
  12466. auto operation = static_cast<GroupOperation>(ops[3]);
  12467. switch (operation)
  12468. {
  12469. case GroupOperationReduce:
  12470. emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
  12471. break;
  12472. case GroupOperationInclusiveScan:
  12473. emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
  12474. "spvSubgroupBallotInclusiveBitCount");
  12475. break;
  12476. case GroupOperationExclusiveScan:
  12477. emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
  12478. "spvSubgroupBallotExclusiveBitCount");
  12479. break;
  12480. default:
  12481. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  12482. }
  12483. break;
  12484. }
  12485. case OpGroupNonUniformShuffle:
  12486. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle");
  12487. break;
  12488. case OpGroupNonUniformShuffleXor:
  12489. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor");
  12490. break;
  12491. case OpGroupNonUniformShuffleUp:
  12492. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp");
  12493. break;
  12494. case OpGroupNonUniformShuffleDown:
  12495. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown");
  12496. break;
  12497. case OpGroupNonUniformAll:
  12498. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  12499. emit_unary_func_op(result_type, id, ops[3], "quad_all");
  12500. else
  12501. emit_unary_func_op(result_type, id, ops[3], "simd_all");
  12502. break;
  12503. case OpGroupNonUniformAny:
  12504. if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions)
  12505. emit_unary_func_op(result_type, id, ops[3], "quad_any");
  12506. else
  12507. emit_unary_func_op(result_type, id, ops[3], "simd_any");
  12508. break;
  12509. case OpGroupNonUniformAllEqual:
  12510. emit_unary_func_op(result_type, id, ops[3], "spvSubgroupAllEqual");
  12511. break;
  12512. // clang-format off
  12513. #define MSL_GROUP_OP(op, msl_op) \
  12514. case OpGroupNonUniform##op: \
  12515. { \
  12516. auto operation = static_cast<GroupOperation>(ops[3]); \
  12517. if (operation == GroupOperationReduce) \
  12518. emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
  12519. else if (operation == GroupOperationInclusiveScan) \
  12520. emit_unary_func_op(result_type, id, ops[4], "simd_prefix_inclusive_" #msl_op); \
  12521. else if (operation == GroupOperationExclusiveScan) \
  12522. emit_unary_func_op(result_type, id, ops[4], "simd_prefix_exclusive_" #msl_op); \
  12523. else if (operation == GroupOperationClusteredReduce) \
  12524. { \
  12525. /* Only cluster sizes of 4 are supported. */ \
  12526. uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
  12527. if (cluster_size != 4) \
  12528. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  12529. emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
  12530. } \
  12531. else \
  12532. SPIRV_CROSS_THROW("Invalid group operation."); \
  12533. break; \
  12534. }
  12535. MSL_GROUP_OP(FAdd, sum)
  12536. MSL_GROUP_OP(FMul, product)
  12537. MSL_GROUP_OP(IAdd, sum)
  12538. MSL_GROUP_OP(IMul, product)
  12539. #undef MSL_GROUP_OP
  12540. // The others, unfortunately, don't support InclusiveScan or ExclusiveScan.
  12541. #define MSL_GROUP_OP(op, msl_op) \
  12542. case OpGroupNonUniform##op: \
  12543. { \
  12544. auto operation = static_cast<GroupOperation>(ops[3]); \
  12545. if (operation == GroupOperationReduce) \
  12546. emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \
  12547. else if (operation == GroupOperationInclusiveScan) \
  12548. SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
  12549. else if (operation == GroupOperationExclusiveScan) \
  12550. SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
  12551. else if (operation == GroupOperationClusteredReduce) \
  12552. { \
  12553. /* Only cluster sizes of 4 are supported. */ \
  12554. uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
  12555. if (cluster_size != 4) \
  12556. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  12557. emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
  12558. } \
  12559. else \
  12560. SPIRV_CROSS_THROW("Invalid group operation."); \
  12561. break; \
  12562. }
  12563. #define MSL_GROUP_OP_CAST(op, msl_op, type) \
  12564. case OpGroupNonUniform##op: \
  12565. { \
  12566. auto operation = static_cast<GroupOperation>(ops[3]); \
  12567. if (operation == GroupOperationReduce) \
  12568. emit_unary_func_op_cast(result_type, id, ops[4], "simd_" #msl_op, type, type); \
  12569. else if (operation == GroupOperationInclusiveScan) \
  12570. SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
  12571. else if (operation == GroupOperationExclusiveScan) \
  12572. SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
  12573. else if (operation == GroupOperationClusteredReduce) \
  12574. { \
  12575. /* Only cluster sizes of 4 are supported. */ \
  12576. uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
  12577. if (cluster_size != 4) \
  12578. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  12579. emit_unary_func_op_cast(result_type, id, ops[4], "quad_" #msl_op, type, type); \
  12580. } \
  12581. else \
  12582. SPIRV_CROSS_THROW("Invalid group operation."); \
  12583. break; \
  12584. }
  12585. MSL_GROUP_OP(FMin, min)
  12586. MSL_GROUP_OP(FMax, max)
  12587. MSL_GROUP_OP_CAST(SMin, min, int_type)
  12588. MSL_GROUP_OP_CAST(SMax, max, int_type)
  12589. MSL_GROUP_OP_CAST(UMin, min, uint_type)
  12590. MSL_GROUP_OP_CAST(UMax, max, uint_type)
  12591. MSL_GROUP_OP(BitwiseAnd, and)
  12592. MSL_GROUP_OP(BitwiseOr, or)
  12593. MSL_GROUP_OP(BitwiseXor, xor)
  12594. MSL_GROUP_OP(LogicalAnd, and)
  12595. MSL_GROUP_OP(LogicalOr, or)
  12596. MSL_GROUP_OP(LogicalXor, xor)
  12597. // clang-format on
  12598. #undef MSL_GROUP_OP
  12599. #undef MSL_GROUP_OP_CAST
  12600. case OpGroupNonUniformQuadSwap:
  12601. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap");
  12602. break;
  12603. case OpGroupNonUniformQuadBroadcast:
  12604. emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast");
  12605. break;
  12606. default:
  12607. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  12608. }
  12609. register_control_dependent_expression(id);
  12610. }
  12611. string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  12612. {
  12613. if (out_type.basetype == in_type.basetype)
  12614. return "";
  12615. assert(out_type.basetype != SPIRType::Boolean);
  12616. assert(in_type.basetype != SPIRType::Boolean);
  12617. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
  12618. bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
  12619. // Bitcasting can only be used between types of the same overall size.
  12620. // And always formally cast between integers, because it's trivial, and also
  12621. // because Metal can internally cast the results of some integer ops to a larger
  12622. // size (eg. short shift right becomes int), which means chaining integer ops
  12623. // together may introduce size variations that SPIR-V doesn't know about.
  12624. if (same_size_cast && !integral_cast)
  12625. {
  12626. return "as_type<" + type_to_glsl(out_type) + ">";
  12627. }
  12628. else
  12629. {
  12630. return type_to_glsl(out_type);
  12631. }
  12632. }
  12633. bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
  12634. {
  12635. return false;
  12636. }
  12637. // Returns an MSL string identifying the name of a SPIR-V builtin.
  12638. // Output builtins are qualified with the name of the stage out structure.
  12639. string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  12640. {
  12641. switch (builtin)
  12642. {
  12643. // Handle HLSL-style 0-based vertex/instance index.
  12644. // Override GLSL compiler strictness
  12645. case BuiltInVertexId:
  12646. ensure_builtin(StorageClassInput, BuiltInVertexId);
  12647. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  12648. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12649. {
  12650. if (builtin_declaration)
  12651. {
  12652. if (needs_base_vertex_arg != TriState::No)
  12653. needs_base_vertex_arg = TriState::Yes;
  12654. return "gl_VertexID";
  12655. }
  12656. else
  12657. {
  12658. ensure_builtin(StorageClassInput, BuiltInBaseVertex);
  12659. return "(gl_VertexID - gl_BaseVertex)";
  12660. }
  12661. }
  12662. else
  12663. {
  12664. return "gl_VertexID";
  12665. }
  12666. case BuiltInInstanceId:
  12667. ensure_builtin(StorageClassInput, BuiltInInstanceId);
  12668. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  12669. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12670. {
  12671. if (builtin_declaration)
  12672. {
  12673. if (needs_base_instance_arg != TriState::No)
  12674. needs_base_instance_arg = TriState::Yes;
  12675. return "gl_InstanceID";
  12676. }
  12677. else
  12678. {
  12679. ensure_builtin(StorageClassInput, BuiltInBaseInstance);
  12680. return "(gl_InstanceID - gl_BaseInstance)";
  12681. }
  12682. }
  12683. else
  12684. {
  12685. return "gl_InstanceID";
  12686. }
  12687. case BuiltInVertexIndex:
  12688. ensure_builtin(StorageClassInput, BuiltInVertexIndex);
  12689. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  12690. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12691. {
  12692. if (builtin_declaration)
  12693. {
  12694. if (needs_base_vertex_arg != TriState::No)
  12695. needs_base_vertex_arg = TriState::Yes;
  12696. return "gl_VertexIndex";
  12697. }
  12698. else
  12699. {
  12700. ensure_builtin(StorageClassInput, BuiltInBaseVertex);
  12701. return "(gl_VertexIndex - gl_BaseVertex)";
  12702. }
  12703. }
  12704. else
  12705. {
  12706. return "gl_VertexIndex";
  12707. }
  12708. case BuiltInInstanceIndex:
  12709. ensure_builtin(StorageClassInput, BuiltInInstanceIndex);
  12710. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  12711. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12712. {
  12713. if (builtin_declaration)
  12714. {
  12715. if (needs_base_instance_arg != TriState::No)
  12716. needs_base_instance_arg = TriState::Yes;
  12717. return "gl_InstanceIndex";
  12718. }
  12719. else
  12720. {
  12721. ensure_builtin(StorageClassInput, BuiltInBaseInstance);
  12722. return "(gl_InstanceIndex - gl_BaseInstance)";
  12723. }
  12724. }
  12725. else
  12726. {
  12727. return "gl_InstanceIndex";
  12728. }
  12729. case BuiltInBaseVertex:
  12730. if (msl_options.supports_msl_version(1, 1) &&
  12731. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12732. {
  12733. needs_base_vertex_arg = TriState::No;
  12734. return "gl_BaseVertex";
  12735. }
  12736. else
  12737. {
  12738. SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware.");
  12739. }
  12740. case BuiltInBaseInstance:
  12741. if (msl_options.supports_msl_version(1, 1) &&
  12742. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  12743. {
  12744. needs_base_instance_arg = TriState::No;
  12745. return "gl_BaseInstance";
  12746. }
  12747. else
  12748. {
  12749. SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware.");
  12750. }
  12751. case BuiltInDrawIndex:
  12752. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  12753. // When used in the entry function, output builtins are qualified with output struct name.
  12754. // Test storage class as NOT Input, as output builtins might be part of generic type.
  12755. // Also don't do this for tessellation control shaders.
  12756. case BuiltInViewportIndex:
  12757. if (!msl_options.supports_msl_version(2, 0))
  12758. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  12759. /* fallthrough */
  12760. case BuiltInFragDepth:
  12761. case BuiltInFragStencilRefEXT:
  12762. if ((builtin == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) ||
  12763. (builtin == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))
  12764. break;
  12765. /* fallthrough */
  12766. case BuiltInPosition:
  12767. case BuiltInPointSize:
  12768. case BuiltInClipDistance:
  12769. case BuiltInCullDistance:
  12770. case BuiltInLayer:
  12771. if (get_execution_model() == ExecutionModelTessellationControl)
  12772. break;
  12773. if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  12774. !is_stage_output_builtin_masked(builtin))
  12775. return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  12776. break;
  12777. case BuiltInSampleMask:
  12778. if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  12779. (has_additional_fixed_sample_mask() || needs_sample_id))
  12780. {
  12781. string samp_mask_in;
  12782. samp_mask_in += "(" + CompilerGLSL::builtin_to_glsl(builtin, storage);
  12783. if (has_additional_fixed_sample_mask())
  12784. samp_mask_in += " & " + additional_fixed_sample_mask_str();
  12785. if (needs_sample_id)
  12786. samp_mask_in += " & (1 << gl_SampleID)";
  12787. samp_mask_in += ")";
  12788. return samp_mask_in;
  12789. }
  12790. if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  12791. !is_stage_output_builtin_masked(builtin))
  12792. return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  12793. break;
  12794. case BuiltInBaryCoordNV:
  12795. case BuiltInBaryCoordNoPerspNV:
  12796. if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
  12797. return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  12798. break;
  12799. case BuiltInTessLevelOuter:
  12800. if (get_execution_model() == ExecutionModelTessellationControl &&
  12801. storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
  12802. {
  12803. return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  12804. "].edgeTessellationFactor");
  12805. }
  12806. break;
  12807. case BuiltInTessLevelInner:
  12808. if (get_execution_model() == ExecutionModelTessellationControl &&
  12809. storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
  12810. {
  12811. return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  12812. "].insideTessellationFactor");
  12813. }
  12814. break;
  12815. default:
  12816. break;
  12817. }
  12818. return CompilerGLSL::builtin_to_glsl(builtin, storage);
  12819. }
  12820. // Returns an MSL string attribute qualifer for a SPIR-V builtin
  12821. string CompilerMSL::builtin_qualifier(BuiltIn builtin)
  12822. {
  12823. auto &execution = get_entry_point();
  12824. switch (builtin)
  12825. {
  12826. // Vertex function in
  12827. case BuiltInVertexId:
  12828. return "vertex_id";
  12829. case BuiltInVertexIndex:
  12830. return "vertex_id";
  12831. case BuiltInBaseVertex:
  12832. return "base_vertex";
  12833. case BuiltInInstanceId:
  12834. return "instance_id";
  12835. case BuiltInInstanceIndex:
  12836. return "instance_id";
  12837. case BuiltInBaseInstance:
  12838. return "base_instance";
  12839. case BuiltInDrawIndex:
  12840. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  12841. // Vertex function out
  12842. case BuiltInClipDistance:
  12843. return "clip_distance";
  12844. case BuiltInPointSize:
  12845. return "point_size";
  12846. case BuiltInPosition:
  12847. if (position_invariant)
  12848. {
  12849. if (!msl_options.supports_msl_version(2, 1))
  12850. SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up.");
  12851. return "position, invariant";
  12852. }
  12853. else
  12854. return "position";
  12855. case BuiltInLayer:
  12856. return "render_target_array_index";
  12857. case BuiltInViewportIndex:
  12858. if (!msl_options.supports_msl_version(2, 0))
  12859. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  12860. return "viewport_array_index";
  12861. // Tess. control function in
  12862. case BuiltInInvocationId:
  12863. if (msl_options.multi_patch_workgroup)
  12864. {
  12865. // Shouldn't be reached.
  12866. SPIRV_CROSS_THROW("InvocationId is computed manually with multi-patch workgroups in MSL.");
  12867. }
  12868. return "thread_index_in_threadgroup";
  12869. case BuiltInPatchVertices:
  12870. // Shouldn't be reached.
  12871. SPIRV_CROSS_THROW("PatchVertices is derived from the auxiliary buffer in MSL.");
  12872. case BuiltInPrimitiveId:
  12873. switch (execution.model)
  12874. {
  12875. case ExecutionModelTessellationControl:
  12876. if (msl_options.multi_patch_workgroup)
  12877. {
  12878. // Shouldn't be reached.
  12879. SPIRV_CROSS_THROW("PrimitiveId is computed manually with multi-patch workgroups in MSL.");
  12880. }
  12881. return "threadgroup_position_in_grid";
  12882. case ExecutionModelTessellationEvaluation:
  12883. return "patch_id";
  12884. case ExecutionModelFragment:
  12885. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  12886. SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3.");
  12887. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2))
  12888. SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2.");
  12889. return "primitive_id";
  12890. default:
  12891. SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model.");
  12892. }
  12893. // Tess. control function out
  12894. case BuiltInTessLevelOuter:
  12895. case BuiltInTessLevelInner:
  12896. // Shouldn't be reached.
  12897. SPIRV_CROSS_THROW("Tessellation levels are handled specially in MSL.");
  12898. // Tess. evaluation function in
  12899. case BuiltInTessCoord:
  12900. return "position_in_patch";
  12901. // Fragment function in
  12902. case BuiltInFrontFacing:
  12903. return "front_facing";
  12904. case BuiltInPointCoord:
  12905. return "point_coord";
  12906. case BuiltInFragCoord:
  12907. return "position";
  12908. case BuiltInSampleId:
  12909. return "sample_id";
  12910. case BuiltInSampleMask:
  12911. return "sample_mask";
  12912. case BuiltInSamplePosition:
  12913. // Shouldn't be reached.
  12914. SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
  12915. case BuiltInViewIndex:
  12916. if (execution.model != ExecutionModelFragment)
  12917. SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
  12918. // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
  12919. // so we can get it from there.
  12920. return "render_target_array_index";
  12921. // Fragment function out
  12922. case BuiltInFragDepth:
  12923. if (execution.flags.get(ExecutionModeDepthGreater))
  12924. return "depth(greater)";
  12925. else if (execution.flags.get(ExecutionModeDepthLess))
  12926. return "depth(less)";
  12927. else
  12928. return "depth(any)";
  12929. case BuiltInFragStencilRefEXT:
  12930. return "stencil";
  12931. // Compute function in
  12932. case BuiltInGlobalInvocationId:
  12933. return "thread_position_in_grid";
  12934. case BuiltInWorkgroupId:
  12935. return "threadgroup_position_in_grid";
  12936. case BuiltInNumWorkgroups:
  12937. return "threadgroups_per_grid";
  12938. case BuiltInLocalInvocationId:
  12939. return "thread_position_in_threadgroup";
  12940. case BuiltInLocalInvocationIndex:
  12941. return "thread_index_in_threadgroup";
  12942. case BuiltInSubgroupSize:
  12943. if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0)
  12944. // Shouldn't be reached.
  12945. SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??");
  12946. if (execution.model == ExecutionModelFragment)
  12947. {
  12948. if (!msl_options.supports_msl_version(2, 2))
  12949. SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
  12950. return "threads_per_simdgroup";
  12951. }
  12952. else
  12953. {
  12954. // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
  12955. // but not in fragment.
  12956. return "thread_execution_width";
  12957. }
  12958. case BuiltInNumSubgroups:
  12959. if (msl_options.emulate_subgroups)
  12960. // Shouldn't be reached.
  12961. SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation.");
  12962. if (!msl_options.supports_msl_version(2))
  12963. SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
  12964. return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
  12965. case BuiltInSubgroupId:
  12966. if (msl_options.emulate_subgroups)
  12967. // Shouldn't be reached.
  12968. SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation.");
  12969. if (!msl_options.supports_msl_version(2))
  12970. SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
  12971. return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
  12972. case BuiltInSubgroupLocalInvocationId:
  12973. if (msl_options.emulate_subgroups)
  12974. // Shouldn't be reached.
  12975. SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation.");
  12976. if (execution.model == ExecutionModelFragment)
  12977. {
  12978. if (!msl_options.supports_msl_version(2, 2))
  12979. SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
  12980. return "thread_index_in_simdgroup";
  12981. }
  12982. else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute ||
  12983. execution.model == ExecutionModelTessellationControl ||
  12984. (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  12985. {
  12986. // We are generating a Metal kernel function.
  12987. if (!msl_options.supports_msl_version(2))
  12988. SPIRV_CROSS_THROW("Subgroup builtins in kernel functions require Metal 2.0.");
  12989. return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
  12990. }
  12991. else
  12992. SPIRV_CROSS_THROW("Subgroup builtins are not available in this type of function.");
  12993. case BuiltInSubgroupEqMask:
  12994. case BuiltInSubgroupGeMask:
  12995. case BuiltInSubgroupGtMask:
  12996. case BuiltInSubgroupLeMask:
  12997. case BuiltInSubgroupLtMask:
  12998. // Shouldn't be reached.
  12999. SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL.");
  13000. case BuiltInBaryCoordNV:
  13001. // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
  13002. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  13003. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
  13004. else if (!msl_options.supports_msl_version(2, 2))
  13005. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
  13006. return "barycentric_coord, center_perspective";
  13007. case BuiltInBaryCoordNoPerspNV:
  13008. // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
  13009. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  13010. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
  13011. else if (!msl_options.supports_msl_version(2, 2))
  13012. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
  13013. return "barycentric_coord, center_no_perspective";
  13014. default:
  13015. return "unsupported-built-in";
  13016. }
  13017. }
  13018. // Returns an MSL string type declaration for a SPIR-V builtin
  13019. string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
  13020. {
  13021. const SPIREntryPoint &execution = get_entry_point();
  13022. switch (builtin)
  13023. {
  13024. // Vertex function in
  13025. case BuiltInVertexId:
  13026. return "uint";
  13027. case BuiltInVertexIndex:
  13028. return "uint";
  13029. case BuiltInBaseVertex:
  13030. return "uint";
  13031. case BuiltInInstanceId:
  13032. return "uint";
  13033. case BuiltInInstanceIndex:
  13034. return "uint";
  13035. case BuiltInBaseInstance:
  13036. return "uint";
  13037. case BuiltInDrawIndex:
  13038. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  13039. // Vertex function out
  13040. case BuiltInClipDistance:
  13041. case BuiltInCullDistance:
  13042. return "float";
  13043. case BuiltInPointSize:
  13044. return "float";
  13045. case BuiltInPosition:
  13046. return "float4";
  13047. case BuiltInLayer:
  13048. return "uint";
  13049. case BuiltInViewportIndex:
  13050. if (!msl_options.supports_msl_version(2, 0))
  13051. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  13052. return "uint";
  13053. // Tess. control function in
  13054. case BuiltInInvocationId:
  13055. return "uint";
  13056. case BuiltInPatchVertices:
  13057. return "uint";
  13058. case BuiltInPrimitiveId:
  13059. return "uint";
  13060. // Tess. control function out
  13061. case BuiltInTessLevelInner:
  13062. if (execution.model == ExecutionModelTessellationEvaluation)
  13063. return !execution.flags.get(ExecutionModeTriangles) ? "float2" : "float";
  13064. return "half";
  13065. case BuiltInTessLevelOuter:
  13066. if (execution.model == ExecutionModelTessellationEvaluation)
  13067. return !execution.flags.get(ExecutionModeTriangles) ? "float4" : "float";
  13068. return "half";
  13069. // Tess. evaluation function in
  13070. case BuiltInTessCoord:
  13071. return execution.flags.get(ExecutionModeTriangles) ? "float3" : "float2";
  13072. // Fragment function in
  13073. case BuiltInFrontFacing:
  13074. return "bool";
  13075. case BuiltInPointCoord:
  13076. return "float2";
  13077. case BuiltInFragCoord:
  13078. return "float4";
  13079. case BuiltInSampleId:
  13080. return "uint";
  13081. case BuiltInSampleMask:
  13082. return "uint";
  13083. case BuiltInSamplePosition:
  13084. return "float2";
  13085. case BuiltInViewIndex:
  13086. return "uint";
  13087. case BuiltInHelperInvocation:
  13088. return "bool";
  13089. case BuiltInBaryCoordNV:
  13090. case BuiltInBaryCoordNoPerspNV:
  13091. // Use the type as declared, can be 1, 2 or 3 components.
  13092. return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
  13093. // Fragment function out
  13094. case BuiltInFragDepth:
  13095. return "float";
  13096. case BuiltInFragStencilRefEXT:
  13097. return "uint";
  13098. // Compute function in
  13099. case BuiltInGlobalInvocationId:
  13100. case BuiltInLocalInvocationId:
  13101. case BuiltInNumWorkgroups:
  13102. case BuiltInWorkgroupId:
  13103. return "uint3";
  13104. case BuiltInLocalInvocationIndex:
  13105. case BuiltInNumSubgroups:
  13106. case BuiltInSubgroupId:
  13107. case BuiltInSubgroupSize:
  13108. case BuiltInSubgroupLocalInvocationId:
  13109. return "uint";
  13110. case BuiltInSubgroupEqMask:
  13111. case BuiltInSubgroupGeMask:
  13112. case BuiltInSubgroupGtMask:
  13113. case BuiltInSubgroupLeMask:
  13114. case BuiltInSubgroupLtMask:
  13115. return "uint4";
  13116. case BuiltInDeviceIndex:
  13117. return "int";
  13118. default:
  13119. return "unsupported-built-in-type";
  13120. }
  13121. }
  13122. // Returns the declaration of a built-in argument to a function
  13123. string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
  13124. {
  13125. string bi_arg;
  13126. if (prefix_comma)
  13127. bi_arg += ", ";
  13128. // Handle HLSL-style 0-based vertex/instance index.
  13129. builtin_declaration = true;
  13130. bi_arg += builtin_type_decl(builtin);
  13131. bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput);
  13132. bi_arg += " [[" + builtin_qualifier(builtin) + "]]";
  13133. builtin_declaration = false;
  13134. return bi_arg;
  13135. }
  13136. const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const
  13137. {
  13138. if (member_is_remapped_physical_type(type, index))
  13139. return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID));
  13140. else
  13141. return get<SPIRType>(type.member_types[index]);
  13142. }
  13143. SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t index) const
  13144. {
  13145. SPIRType type = get_physical_member_type(ib_type, index);
  13146. uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation);
  13147. uint32_t cmp = get_member_decoration(ib_type.self, index, DecorationComponent);
  13148. auto p_va = inputs_by_location.find({loc, cmp});
  13149. if (p_va != end(inputs_by_location) && p_va->second.vecsize > type.vecsize)
  13150. type.vecsize = p_va->second.vecsize;
  13151. return type;
  13152. }
  13153. uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const
  13154. {
  13155. // Array stride in MSL is always size * array_size. sizeof(float3) == 16,
  13156. // unlike GLSL and HLSL where array stride would be 16 and size 12.
  13157. // We could use parent type here and recurse, but that makes creating physical type remappings
  13158. // far more complicated. We'd rather just create the final type, and ignore having to create the entire type
  13159. // hierarchy in order to compute this value, so make a temporary type on the stack.
  13160. auto basic_type = type;
  13161. basic_type.array.clear();
  13162. basic_type.array_size_literal.clear();
  13163. uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major);
  13164. uint32_t dimensions = uint32_t(type.array.size());
  13165. assert(dimensions > 0);
  13166. dimensions--;
  13167. // Multiply together every dimension, except the last one.
  13168. for (uint32_t dim = 0; dim < dimensions; dim++)
  13169. {
  13170. uint32_t array_size = to_array_size_literal(type, dim);
  13171. value_size *= max(array_size, 1u);
  13172. }
  13173. return value_size;
  13174. }
  13175. uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const
  13176. {
  13177. return get_declared_type_array_stride_msl(get_physical_member_type(type, index),
  13178. member_is_packed_physical_type(type, index),
  13179. has_member_decoration(type.self, index, DecorationRowMajor));
  13180. }
  13181. uint32_t CompilerMSL::get_declared_input_array_stride_msl(const SPIRType &type, uint32_t index) const
  13182. {
  13183. return get_declared_type_array_stride_msl(get_presumed_input_type(type, index), false,
  13184. has_member_decoration(type.self, index, DecorationRowMajor));
  13185. }
  13186. uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const
  13187. {
  13188. // For packed matrices, we just use the size of the vector type.
  13189. // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type.
  13190. if (packed)
  13191. return (type.width / 8) * ((row_major && type.columns > 1) ? type.columns : type.vecsize);
  13192. else
  13193. return get_declared_type_alignment_msl(type, false, row_major);
  13194. }
  13195. uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const
  13196. {
  13197. return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index),
  13198. member_is_packed_physical_type(type, index),
  13199. has_member_decoration(type.self, index, DecorationRowMajor));
  13200. }
  13201. uint32_t CompilerMSL::get_declared_input_matrix_stride_msl(const SPIRType &type, uint32_t index) const
  13202. {
  13203. return get_declared_type_matrix_stride_msl(get_presumed_input_type(type, index), false,
  13204. has_member_decoration(type.self, index, DecorationRowMajor));
  13205. }
  13206. uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment,
  13207. bool ignore_padding) const
  13208. {
  13209. // If we have a target size, that is the declared size as well.
  13210. if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget))
  13211. return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget);
  13212. if (struct_type.member_types.empty())
  13213. return 0;
  13214. uint32_t mbr_cnt = uint32_t(struct_type.member_types.size());
  13215. // In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
  13216. uint32_t alignment = 1;
  13217. if (!ignore_alignment)
  13218. {
  13219. for (uint32_t i = 0; i < mbr_cnt; i++)
  13220. {
  13221. uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i);
  13222. alignment = max(alignment, mbr_alignment);
  13223. }
  13224. }
  13225. // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends
  13226. // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment.
  13227. uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1);
  13228. uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1);
  13229. msl_size = (msl_size + alignment - 1) & ~(alignment - 1);
  13230. return msl_size;
  13231. }
  13232. // Returns the byte size of a struct member.
  13233. uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
  13234. {
  13235. switch (type.basetype)
  13236. {
  13237. case SPIRType::Unknown:
  13238. case SPIRType::Void:
  13239. case SPIRType::AtomicCounter:
  13240. case SPIRType::Image:
  13241. case SPIRType::SampledImage:
  13242. case SPIRType::Sampler:
  13243. SPIRV_CROSS_THROW("Querying size of opaque object.");
  13244. default:
  13245. {
  13246. if (!type.array.empty())
  13247. {
  13248. uint32_t array_size = to_array_size_literal(type);
  13249. return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u);
  13250. }
  13251. if (type.basetype == SPIRType::Struct)
  13252. return get_declared_struct_size_msl(type);
  13253. if (is_packed)
  13254. {
  13255. return type.vecsize * type.columns * (type.width / 8);
  13256. }
  13257. else
  13258. {
  13259. // An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
  13260. uint32_t vecsize = type.vecsize;
  13261. uint32_t columns = type.columns;
  13262. if (row_major && columns > 1)
  13263. swap(vecsize, columns);
  13264. if (vecsize == 3)
  13265. vecsize = 4;
  13266. return vecsize * columns * (type.width / 8);
  13267. }
  13268. }
  13269. }
  13270. }
  13271. uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const
  13272. {
  13273. return get_declared_type_size_msl(get_physical_member_type(type, index),
  13274. member_is_packed_physical_type(type, index),
  13275. has_member_decoration(type.self, index, DecorationRowMajor));
  13276. }
  13277. uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t index) const
  13278. {
  13279. return get_declared_type_size_msl(get_presumed_input_type(type, index), false,
  13280. has_member_decoration(type.self, index, DecorationRowMajor));
  13281. }
  13282. // Returns the byte alignment of a type.
  13283. uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
  13284. {
  13285. switch (type.basetype)
  13286. {
  13287. case SPIRType::Unknown:
  13288. case SPIRType::Void:
  13289. case SPIRType::AtomicCounter:
  13290. case SPIRType::Image:
  13291. case SPIRType::SampledImage:
  13292. case SPIRType::Sampler:
  13293. SPIRV_CROSS_THROW("Querying alignment of opaque object.");
  13294. case SPIRType::Double:
  13295. SPIRV_CROSS_THROW("double types are not supported in buffers in MSL.");
  13296. case SPIRType::Struct:
  13297. {
  13298. // In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
  13299. uint32_t alignment = 1;
  13300. for (uint32_t i = 0; i < type.member_types.size(); i++)
  13301. alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i)));
  13302. return alignment;
  13303. }
  13304. default:
  13305. {
  13306. if (type.basetype == SPIRType::Int64 && !msl_options.supports_msl_version(2, 3))
  13307. SPIRV_CROSS_THROW("long types in buffers are only supported in MSL 2.3 and above.");
  13308. if (type.basetype == SPIRType::UInt64 && !msl_options.supports_msl_version(2, 3))
  13309. SPIRV_CROSS_THROW("ulong types in buffers are only supported in MSL 2.3 and above.");
  13310. // Alignment of packed type is the same as the underlying component or column size.
  13311. // Alignment of unpacked type is the same as the vector size.
  13312. // Alignment of 3-elements vector is the same as 4-elements (including packed using column).
  13313. if (is_packed)
  13314. {
  13315. // If we have packed_T and friends, the alignment is always scalar.
  13316. return type.width / 8;
  13317. }
  13318. else
  13319. {
  13320. // This is the general rule for MSL. Size == alignment.
  13321. uint32_t vecsize = (row_major && type.columns > 1) ? type.columns : type.vecsize;
  13322. return (type.width / 8) * (vecsize == 3 ? 4 : vecsize);
  13323. }
  13324. }
  13325. }
  13326. }
  13327. uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const
  13328. {
  13329. return get_declared_type_alignment_msl(get_physical_member_type(type, index),
  13330. member_is_packed_physical_type(type, index),
  13331. has_member_decoration(type.self, index, DecorationRowMajor));
  13332. }
  13333. uint32_t CompilerMSL::get_declared_input_alignment_msl(const SPIRType &type, uint32_t index) const
  13334. {
  13335. return get_declared_type_alignment_msl(get_presumed_input_type(type, index), false,
  13336. has_member_decoration(type.self, index, DecorationRowMajor));
  13337. }
  13338. bool CompilerMSL::skip_argument(uint32_t) const
  13339. {
  13340. return false;
  13341. }
  13342. void CompilerMSL::analyze_sampled_image_usage()
  13343. {
  13344. if (msl_options.swizzle_texture_samples)
  13345. {
  13346. SampledImageScanner scanner(*this);
  13347. traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), scanner);
  13348. }
  13349. }
  13350. bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
  13351. {
  13352. switch (opcode)
  13353. {
  13354. case OpLoad:
  13355. case OpImage:
  13356. case OpSampledImage:
  13357. {
  13358. if (length < 3)
  13359. return false;
  13360. uint32_t result_type = args[0];
  13361. auto &type = compiler.get<SPIRType>(result_type);
  13362. if ((type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage) || type.image.sampled != 1)
  13363. return true;
  13364. uint32_t id = args[1];
  13365. compiler.set<SPIRExpression>(id, "", result_type, true);
  13366. break;
  13367. }
  13368. case OpImageSampleExplicitLod:
  13369. case OpImageSampleProjExplicitLod:
  13370. case OpImageSampleDrefExplicitLod:
  13371. case OpImageSampleProjDrefExplicitLod:
  13372. case OpImageSampleImplicitLod:
  13373. case OpImageSampleProjImplicitLod:
  13374. case OpImageSampleDrefImplicitLod:
  13375. case OpImageSampleProjDrefImplicitLod:
  13376. case OpImageFetch:
  13377. case OpImageGather:
  13378. case OpImageDrefGather:
  13379. compiler.has_sampled_images =
  13380. compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2]));
  13381. compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images;
  13382. break;
  13383. default:
  13384. break;
  13385. }
  13386. return true;
  13387. }
  13388. // If a needed custom function wasn't added before, add it and force a recompile.
  13389. void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func)
  13390. {
  13391. if (spv_function_implementations.count(spv_func) == 0)
  13392. {
  13393. spv_function_implementations.insert(spv_func);
  13394. suppress_missing_prototypes = true;
  13395. force_recompile();
  13396. }
  13397. }
  13398. bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
  13399. {
  13400. // Since MSL exists in a single execution scope, function prototype declarations are not
  13401. // needed, and clutter the output. If secondary functions are output (either as a SPIR-V
  13402. // function implementation or as indicated by the presence of OpFunctionCall), then set
  13403. // suppress_missing_prototypes to suppress compiler warnings of missing function prototypes.
  13404. // Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal.
  13405. SPVFuncImpl spv_func = get_spv_func_impl(opcode, args);
  13406. if (spv_func != SPVFuncImplNone)
  13407. {
  13408. compiler.spv_function_implementations.insert(spv_func);
  13409. suppress_missing_prototypes = true;
  13410. }
  13411. switch (opcode)
  13412. {
  13413. case OpFunctionCall:
  13414. suppress_missing_prototypes = true;
  13415. break;
  13416. // Emulate texture2D atomic operations
  13417. case OpImageTexelPointer:
  13418. {
  13419. auto *var = compiler.maybe_get_backing_variable(args[2]);
  13420. image_pointers[args[1]] = var ? var->self : ID(0);
  13421. break;
  13422. }
  13423. case OpImageWrite:
  13424. if (!compiler.msl_options.supports_msl_version(2, 2))
  13425. uses_resource_write = true;
  13426. break;
  13427. case OpStore:
  13428. check_resource_write(args[0]);
  13429. break;
  13430. // Emulate texture2D atomic operations
  13431. case OpAtomicExchange:
  13432. case OpAtomicCompareExchange:
  13433. case OpAtomicCompareExchangeWeak:
  13434. case OpAtomicIIncrement:
  13435. case OpAtomicIDecrement:
  13436. case OpAtomicIAdd:
  13437. case OpAtomicISub:
  13438. case OpAtomicSMin:
  13439. case OpAtomicUMin:
  13440. case OpAtomicSMax:
  13441. case OpAtomicUMax:
  13442. case OpAtomicAnd:
  13443. case OpAtomicOr:
  13444. case OpAtomicXor:
  13445. {
  13446. uses_atomics = true;
  13447. auto it = image_pointers.find(args[2]);
  13448. if (it != image_pointers.end())
  13449. {
  13450. compiler.atomic_image_vars.insert(it->second);
  13451. }
  13452. check_resource_write(args[2]);
  13453. break;
  13454. }
  13455. case OpAtomicStore:
  13456. {
  13457. uses_atomics = true;
  13458. auto it = image_pointers.find(args[0]);
  13459. if (it != image_pointers.end())
  13460. {
  13461. compiler.atomic_image_vars.insert(it->second);
  13462. }
  13463. check_resource_write(args[0]);
  13464. break;
  13465. }
  13466. case OpAtomicLoad:
  13467. {
  13468. uses_atomics = true;
  13469. auto it = image_pointers.find(args[2]);
  13470. if (it != image_pointers.end())
  13471. {
  13472. compiler.atomic_image_vars.insert(it->second);
  13473. }
  13474. break;
  13475. }
  13476. case OpGroupNonUniformInverseBallot:
  13477. needs_subgroup_invocation_id = true;
  13478. break;
  13479. case OpGroupNonUniformBallotFindLSB:
  13480. case OpGroupNonUniformBallotFindMSB:
  13481. needs_subgroup_size = true;
  13482. break;
  13483. case OpGroupNonUniformBallotBitCount:
  13484. if (args[3] == GroupOperationReduce)
  13485. needs_subgroup_size = true;
  13486. else
  13487. needs_subgroup_invocation_id = true;
  13488. break;
  13489. case OpArrayLength:
  13490. {
  13491. auto *var = compiler.maybe_get_backing_variable(args[2]);
  13492. if (var)
  13493. compiler.buffers_requiring_array_length.insert(var->self);
  13494. break;
  13495. }
  13496. case OpInBoundsAccessChain:
  13497. case OpAccessChain:
  13498. case OpPtrAccessChain:
  13499. {
  13500. // OpArrayLength might want to know if taking ArrayLength of an array of SSBOs.
  13501. uint32_t result_type = args[0];
  13502. uint32_t id = args[1];
  13503. uint32_t ptr = args[2];
  13504. compiler.set<SPIRExpression>(id, "", result_type, true);
  13505. compiler.register_read(id, ptr, true);
  13506. compiler.ir.ids[id].set_allow_type_rewrite();
  13507. break;
  13508. }
  13509. case OpExtInst:
  13510. {
  13511. uint32_t extension_set = args[2];
  13512. if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  13513. {
  13514. auto op_450 = static_cast<GLSLstd450>(args[3]);
  13515. switch (op_450)
  13516. {
  13517. case GLSLstd450InterpolateAtCentroid:
  13518. case GLSLstd450InterpolateAtSample:
  13519. case GLSLstd450InterpolateAtOffset:
  13520. {
  13521. if (!compiler.msl_options.supports_msl_version(2, 3))
  13522. SPIRV_CROSS_THROW("Pull-model interpolation requires MSL 2.3.");
  13523. // Fragment varyings used with pull-model interpolation need special handling,
  13524. // due to the way pull-model interpolation works in Metal.
  13525. auto *var = compiler.maybe_get_backing_variable(args[4]);
  13526. if (var)
  13527. {
  13528. compiler.pull_model_inputs.insert(var->self);
  13529. auto &var_type = compiler.get_variable_element_type(*var);
  13530. // In addition, if this variable has a 'Sample' decoration, we need the sample ID
  13531. // in order to do default interpolation.
  13532. if (compiler.has_decoration(var->self, DecorationSample))
  13533. {
  13534. needs_sample_id = true;
  13535. }
  13536. else if (var_type.basetype == SPIRType::Struct)
  13537. {
  13538. // Now we need to check each member and see if it has this decoration.
  13539. for (uint32_t i = 0; i < var_type.member_types.size(); ++i)
  13540. {
  13541. if (compiler.has_member_decoration(var_type.self, i, DecorationSample))
  13542. {
  13543. needs_sample_id = true;
  13544. break;
  13545. }
  13546. }
  13547. }
  13548. }
  13549. break;
  13550. }
  13551. default:
  13552. break;
  13553. }
  13554. }
  13555. break;
  13556. }
  13557. default:
  13558. break;
  13559. }
  13560. // If it has one, keep track of the instruction's result type, mapped by ID
  13561. uint32_t result_type, result_id;
  13562. if (compiler.instruction_to_result_type(result_type, result_id, opcode, args, length))
  13563. result_types[result_id] = result_type;
  13564. return true;
  13565. }
  13566. // If the variable is a Uniform or StorageBuffer, mark that a resource has been written to.
  13567. void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
  13568. {
  13569. auto *p_var = compiler.maybe_get_backing_variable(var_id);
  13570. StorageClass sc = p_var ? p_var->storage : StorageClassMax;
  13571. if (!compiler.msl_options.supports_msl_version(2, 1) &&
  13572. (sc == StorageClassUniform || sc == StorageClassStorageBuffer))
  13573. uses_resource_write = true;
  13574. }
  13575. // Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
  13576. CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args)
  13577. {
  13578. switch (opcode)
  13579. {
  13580. case OpFMod:
  13581. return SPVFuncImplMod;
  13582. case OpFAdd:
  13583. case OpFSub:
  13584. if (compiler.msl_options.invariant_float_math ||
  13585. compiler.has_decoration(args[1], DecorationNoContraction))
  13586. {
  13587. return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
  13588. }
  13589. break;
  13590. case OpFMul:
  13591. case OpOuterProduct:
  13592. case OpMatrixTimesVector:
  13593. case OpVectorTimesMatrix:
  13594. case OpMatrixTimesMatrix:
  13595. if (compiler.msl_options.invariant_float_math ||
  13596. compiler.has_decoration(args[1], DecorationNoContraction))
  13597. {
  13598. return SPVFuncImplFMul;
  13599. }
  13600. break;
  13601. case OpQuantizeToF16:
  13602. return SPVFuncImplQuantizeToF16;
  13603. case OpTypeArray:
  13604. {
  13605. // Allow Metal to use the array<T> template to make arrays a value type
  13606. return SPVFuncImplUnsafeArray;
  13607. }
  13608. // Emulate texture2D atomic operations
  13609. case OpAtomicExchange:
  13610. case OpAtomicCompareExchange:
  13611. case OpAtomicCompareExchangeWeak:
  13612. case OpAtomicIIncrement:
  13613. case OpAtomicIDecrement:
  13614. case OpAtomicIAdd:
  13615. case OpAtomicISub:
  13616. case OpAtomicSMin:
  13617. case OpAtomicUMin:
  13618. case OpAtomicSMax:
  13619. case OpAtomicUMax:
  13620. case OpAtomicAnd:
  13621. case OpAtomicOr:
  13622. case OpAtomicXor:
  13623. case OpAtomicLoad:
  13624. case OpAtomicStore:
  13625. {
  13626. auto it = image_pointers.find(args[opcode == OpAtomicStore ? 0 : 2]);
  13627. if (it != image_pointers.end())
  13628. {
  13629. uint32_t tid = compiler.get<SPIRVariable>(it->second).basetype;
  13630. if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D)
  13631. return SPVFuncImplImage2DAtomicCoords;
  13632. }
  13633. break;
  13634. }
  13635. case OpImageFetch:
  13636. case OpImageRead:
  13637. case OpImageWrite:
  13638. {
  13639. // Retrieve the image type, and if it's a Buffer, emit a texel coordinate function
  13640. uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]];
  13641. if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native)
  13642. return SPVFuncImplTexelBufferCoords;
  13643. break;
  13644. }
  13645. case OpExtInst:
  13646. {
  13647. uint32_t extension_set = args[2];
  13648. if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  13649. {
  13650. auto op_450 = static_cast<GLSLstd450>(args[3]);
  13651. switch (op_450)
  13652. {
  13653. case GLSLstd450Radians:
  13654. return SPVFuncImplRadians;
  13655. case GLSLstd450Degrees:
  13656. return SPVFuncImplDegrees;
  13657. case GLSLstd450FindILsb:
  13658. return SPVFuncImplFindILsb;
  13659. case GLSLstd450FindSMsb:
  13660. return SPVFuncImplFindSMsb;
  13661. case GLSLstd450FindUMsb:
  13662. return SPVFuncImplFindUMsb;
  13663. case GLSLstd450SSign:
  13664. return SPVFuncImplSSign;
  13665. case GLSLstd450Reflect:
  13666. {
  13667. auto &type = compiler.get<SPIRType>(args[0]);
  13668. if (type.vecsize == 1)
  13669. return SPVFuncImplReflectScalar;
  13670. break;
  13671. }
  13672. case GLSLstd450Refract:
  13673. {
  13674. auto &type = compiler.get<SPIRType>(args[0]);
  13675. if (type.vecsize == 1)
  13676. return SPVFuncImplRefractScalar;
  13677. break;
  13678. }
  13679. case GLSLstd450FaceForward:
  13680. {
  13681. auto &type = compiler.get<SPIRType>(args[0]);
  13682. if (type.vecsize == 1)
  13683. return SPVFuncImplFaceForwardScalar;
  13684. break;
  13685. }
  13686. case GLSLstd450MatrixInverse:
  13687. {
  13688. auto &mat_type = compiler.get<SPIRType>(args[0]);
  13689. switch (mat_type.columns)
  13690. {
  13691. case 2:
  13692. return SPVFuncImplInverse2x2;
  13693. case 3:
  13694. return SPVFuncImplInverse3x3;
  13695. case 4:
  13696. return SPVFuncImplInverse4x4;
  13697. default:
  13698. break;
  13699. }
  13700. break;
  13701. }
  13702. default:
  13703. break;
  13704. }
  13705. }
  13706. break;
  13707. }
  13708. case OpGroupNonUniformBroadcast:
  13709. return SPVFuncImplSubgroupBroadcast;
  13710. case OpGroupNonUniformBroadcastFirst:
  13711. return SPVFuncImplSubgroupBroadcastFirst;
  13712. case OpGroupNonUniformBallot:
  13713. return SPVFuncImplSubgroupBallot;
  13714. case OpGroupNonUniformInverseBallot:
  13715. case OpGroupNonUniformBallotBitExtract:
  13716. return SPVFuncImplSubgroupBallotBitExtract;
  13717. case OpGroupNonUniformBallotFindLSB:
  13718. return SPVFuncImplSubgroupBallotFindLSB;
  13719. case OpGroupNonUniformBallotFindMSB:
  13720. return SPVFuncImplSubgroupBallotFindMSB;
  13721. case OpGroupNonUniformBallotBitCount:
  13722. return SPVFuncImplSubgroupBallotBitCount;
  13723. case OpGroupNonUniformAllEqual:
  13724. return SPVFuncImplSubgroupAllEqual;
  13725. case OpGroupNonUniformShuffle:
  13726. return SPVFuncImplSubgroupShuffle;
  13727. case OpGroupNonUniformShuffleXor:
  13728. return SPVFuncImplSubgroupShuffleXor;
  13729. case OpGroupNonUniformShuffleUp:
  13730. return SPVFuncImplSubgroupShuffleUp;
  13731. case OpGroupNonUniformShuffleDown:
  13732. return SPVFuncImplSubgroupShuffleDown;
  13733. case OpGroupNonUniformQuadBroadcast:
  13734. return SPVFuncImplQuadBroadcast;
  13735. case OpGroupNonUniformQuadSwap:
  13736. return SPVFuncImplQuadSwap;
  13737. default:
  13738. break;
  13739. }
  13740. return SPVFuncImplNone;
  13741. }
  13742. // Sort both type and meta member content based on builtin status (put builtins at end),
  13743. // then by the required sorting aspect.
  13744. void CompilerMSL::MemberSorter::sort()
  13745. {
  13746. // Create a temporary array of consecutive member indices and sort it based on how
  13747. // the members should be reordered, based on builtin and sorting aspect meta info.
  13748. size_t mbr_cnt = type.member_types.size();
  13749. SmallVector<uint32_t> mbr_idxs(mbr_cnt);
  13750. std::iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices
  13751. std::stable_sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect
  13752. bool sort_is_identity = true;
  13753. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  13754. {
  13755. if (mbr_idx != mbr_idxs[mbr_idx])
  13756. {
  13757. sort_is_identity = false;
  13758. break;
  13759. }
  13760. }
  13761. if (sort_is_identity)
  13762. return;
  13763. if (meta.members.size() < type.member_types.size())
  13764. {
  13765. // This should never trigger in normal circumstances, but to be safe.
  13766. meta.members.resize(type.member_types.size());
  13767. }
  13768. // Move type and meta member info to the order defined by the sorted member indices.
  13769. // This is done by creating temporary copies of both member types and meta, and then
  13770. // copying back to the original content at the sorted indices.
  13771. auto mbr_types_cpy = type.member_types;
  13772. auto mbr_meta_cpy = meta.members;
  13773. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  13774. {
  13775. type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]];
  13776. meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]];
  13777. }
  13778. // If we're sorting by Offset, this might affect user code which accesses a buffer block.
  13779. // We will need to redirect member indices from defined index to sorted index using reverse lookup.
  13780. if (sort_aspect == SortAspect::Offset)
  13781. {
  13782. type.member_type_index_redirection.resize(mbr_cnt);
  13783. for (uint32_t map_idx = 0; map_idx < mbr_cnt; map_idx++)
  13784. type.member_type_index_redirection[mbr_idxs[map_idx]] = map_idx;
  13785. }
  13786. }
  13787. bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2)
  13788. {
  13789. auto &mbr_meta1 = meta.members[mbr_idx1];
  13790. auto &mbr_meta2 = meta.members[mbr_idx2];
  13791. if (sort_aspect == LocationThenBuiltInType)
  13792. {
  13793. // Sort first by builtin status (put builtins at end), then by the sorting aspect.
  13794. if (mbr_meta1.builtin != mbr_meta2.builtin)
  13795. return mbr_meta2.builtin;
  13796. else if (mbr_meta1.builtin)
  13797. return mbr_meta1.builtin_type < mbr_meta2.builtin_type;
  13798. else if (mbr_meta1.location == mbr_meta2.location)
  13799. return mbr_meta1.component < mbr_meta2.component;
  13800. else
  13801. return mbr_meta1.location < mbr_meta2.location;
  13802. }
  13803. else
  13804. return mbr_meta1.offset < mbr_meta2.offset;
  13805. }
  13806. CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
  13807. : type(t)
  13808. , meta(m)
  13809. , sort_aspect(sa)
  13810. {
  13811. // Ensure enough meta info is available
  13812. meta.members.resize(max(type.member_types.size(), meta.members.size()));
  13813. }
  13814. void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler)
  13815. {
  13816. auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
  13817. if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
  13818. SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type.");
  13819. if (!type.array.empty())
  13820. SPIRV_CROSS_THROW("Can not remap array of samplers.");
  13821. constexpr_samplers_by_id[id] = sampler;
  13822. }
  13823. void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding,
  13824. const MSLConstexprSampler &sampler)
  13825. {
  13826. constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
  13827. }
  13828. void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  13829. {
  13830. auto *var = maybe_get_backing_variable(source_id);
  13831. if (var)
  13832. source_id = var->self;
  13833. // Type fixups for workgroup variables if they are booleans.
  13834. if (var && var->storage == StorageClassWorkgroup && expr_type.basetype == SPIRType::Boolean)
  13835. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  13836. // Only interested in standalone builtin variables.
  13837. if (!has_decoration(source_id, DecorationBuiltIn))
  13838. return;
  13839. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  13840. auto expected_type = expr_type.basetype;
  13841. auto expected_width = expr_type.width;
  13842. switch (builtin)
  13843. {
  13844. case BuiltInGlobalInvocationId:
  13845. case BuiltInLocalInvocationId:
  13846. case BuiltInWorkgroupId:
  13847. case BuiltInLocalInvocationIndex:
  13848. case BuiltInWorkgroupSize:
  13849. case BuiltInNumWorkgroups:
  13850. case BuiltInLayer:
  13851. case BuiltInViewportIndex:
  13852. case BuiltInFragStencilRefEXT:
  13853. case BuiltInPrimitiveId:
  13854. case BuiltInSubgroupSize:
  13855. case BuiltInSubgroupLocalInvocationId:
  13856. case BuiltInViewIndex:
  13857. case BuiltInVertexIndex:
  13858. case BuiltInInstanceIndex:
  13859. case BuiltInBaseInstance:
  13860. case BuiltInBaseVertex:
  13861. expected_type = SPIRType::UInt;
  13862. expected_width = 32;
  13863. break;
  13864. case BuiltInTessLevelInner:
  13865. case BuiltInTessLevelOuter:
  13866. if (get_execution_model() == ExecutionModelTessellationControl)
  13867. {
  13868. expected_type = SPIRType::Half;
  13869. expected_width = 16;
  13870. }
  13871. break;
  13872. default:
  13873. break;
  13874. }
  13875. if (expected_type != expr_type.basetype)
  13876. {
  13877. if (!expr_type.array.empty() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
  13878. {
  13879. // Triggers when loading TessLevel directly as an array.
  13880. // Need explicit padding + cast.
  13881. auto wrap_expr = join(type_to_glsl(expr_type), "({ ");
  13882. uint32_t array_size = get_physical_tess_level_array_size(builtin);
  13883. for (uint32_t i = 0; i < array_size; i++)
  13884. {
  13885. if (array_size > 1)
  13886. wrap_expr += join("float(", expr, "[", i, "])");
  13887. else
  13888. wrap_expr += join("float(", expr, ")");
  13889. if (i + 1 < array_size)
  13890. wrap_expr += ", ";
  13891. }
  13892. if (get_execution_mode_bitset().get(ExecutionModeTriangles))
  13893. wrap_expr += ", 0.0";
  13894. wrap_expr += " })";
  13895. expr = std::move(wrap_expr);
  13896. }
  13897. else
  13898. {
  13899. // These are of different widths, so we cannot do a straight bitcast.
  13900. if (expected_width != expr_type.width)
  13901. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  13902. else
  13903. expr = bitcast_expression(expr_type, expected_type, expr);
  13904. }
  13905. }
  13906. if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3)
  13907. {
  13908. // In SPIR-V, this is always a vec3, even for quads. In Metal, though, it's a float2 for quads.
  13909. // The code is expecting a float3, so we need to widen this.
  13910. expr = join("float3(", expr, ", 0)");
  13911. }
  13912. }
  13913. void CompilerMSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  13914. {
  13915. auto *var = maybe_get_backing_variable(target_id);
  13916. if (var)
  13917. target_id = var->self;
  13918. // Type fixups for workgroup variables if they are booleans.
  13919. if (var && var->storage == StorageClassWorkgroup && expr_type.basetype == SPIRType::Boolean)
  13920. {
  13921. auto short_type = expr_type;
  13922. short_type.basetype = SPIRType::Short;
  13923. expr = join(type_to_glsl(short_type), "(", expr, ")");
  13924. }
  13925. // Only interested in standalone builtin variables.
  13926. if (!has_decoration(target_id, DecorationBuiltIn))
  13927. return;
  13928. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  13929. auto expected_type = expr_type.basetype;
  13930. auto expected_width = expr_type.width;
  13931. switch (builtin)
  13932. {
  13933. case BuiltInLayer:
  13934. case BuiltInViewportIndex:
  13935. case BuiltInFragStencilRefEXT:
  13936. case BuiltInPrimitiveId:
  13937. case BuiltInViewIndex:
  13938. expected_type = SPIRType::UInt;
  13939. expected_width = 32;
  13940. break;
  13941. case BuiltInTessLevelInner:
  13942. case BuiltInTessLevelOuter:
  13943. expected_type = SPIRType::Half;
  13944. expected_width = 16;
  13945. break;
  13946. default:
  13947. break;
  13948. }
  13949. if (expected_type != expr_type.basetype)
  13950. {
  13951. if (expected_width != expr_type.width)
  13952. {
  13953. // These are of different widths, so we cannot do a straight bitcast.
  13954. auto type = expr_type;
  13955. type.basetype = expected_type;
  13956. type.width = expected_width;
  13957. expr = join(type_to_glsl(type), "(", expr, ")");
  13958. }
  13959. else
  13960. {
  13961. auto type = expr_type;
  13962. type.basetype = expected_type;
  13963. expr = bitcast_expression(type, expr_type.basetype, expr);
  13964. }
  13965. }
  13966. }
  13967. string CompilerMSL::to_initializer_expression(const SPIRVariable &var)
  13968. {
  13969. // We risk getting an array initializer here with MSL. If we have an array.
  13970. // FIXME: We cannot handle non-constant arrays being initialized.
  13971. // We will need to inject spvArrayCopy here somehow ...
  13972. auto &type = get<SPIRType>(var.basetype);
  13973. string expr;
  13974. if (ir.ids[var.initializer].get_type() == TypeConstant &&
  13975. (!type.array.empty() || type.basetype == SPIRType::Struct))
  13976. expr = constant_expression(get<SPIRConstant>(var.initializer));
  13977. else
  13978. expr = CompilerGLSL::to_initializer_expression(var);
  13979. // If the initializer has more vector components than the variable, add a swizzle.
  13980. // FIXME: This can't handle arrays or structs.
  13981. auto &init_type = expression_type(var.initializer);
  13982. if (type.array.empty() && type.basetype != SPIRType::Struct && init_type.vecsize > type.vecsize)
  13983. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  13984. return expr;
  13985. }
  13986. string CompilerMSL::to_zero_initialized_expression(uint32_t)
  13987. {
  13988. return "{}";
  13989. }
  13990. bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const
  13991. {
  13992. if (!msl_options.argument_buffers)
  13993. return false;
  13994. if (desc_set >= kMaxArgumentBuffers)
  13995. return false;
  13996. return (argument_buffer_discrete_mask & (1u << desc_set)) == 0;
  13997. }
  13998. bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const
  13999. {
  14000. // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
  14001. // But we won't know when the argument buffer is encoded whether this image will have
  14002. // a NonWritable decoration. So just use discrete arguments for all storage images
  14003. // on iOS.
  14004. bool is_storage_image = type.basetype == SPIRType::Image && type.image.sampled == 2;
  14005. bool is_supported_type = !msl_options.is_ios() || !is_storage_image;
  14006. return !type_is_msl_framebuffer_fetch(type) && is_supported_type;
  14007. }
  14008. void CompilerMSL::analyze_argument_buffers()
  14009. {
  14010. // Gather all used resources and sort them out into argument buffers.
  14011. // Each argument buffer corresponds to a descriptor set in SPIR-V.
  14012. // The [[id(N)]] values used correspond to the resource mapping we have for MSL.
  14013. // Otherwise, the binding number is used, but this is generally not safe some types like
  14014. // combined image samplers and arrays of resources. Metal needs different indices here,
  14015. // while SPIR-V can have one descriptor set binding. To use argument buffers in practice,
  14016. // you will need to use the remapping from the API.
  14017. for (auto &id : argument_buffer_ids)
  14018. id = 0;
  14019. // Output resources, sorted by resource index & type.
  14020. struct Resource
  14021. {
  14022. SPIRVariable *var;
  14023. string name;
  14024. SPIRType::BaseType basetype;
  14025. uint32_t index;
  14026. uint32_t plane;
  14027. };
  14028. SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
  14029. SmallVector<uint32_t> inline_block_vars;
  14030. bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {};
  14031. bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {};
  14032. bool needs_buffer_sizes = false;
  14033. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) {
  14034. if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  14035. var.storage == StorageClassStorageBuffer) &&
  14036. !is_hidden_variable(var))
  14037. {
  14038. uint32_t desc_set = get_decoration(self, DecorationDescriptorSet);
  14039. // Ignore if it's part of a push descriptor set.
  14040. if (!descriptor_set_is_argument_buffer(desc_set))
  14041. return;
  14042. uint32_t var_id = var.self;
  14043. auto &type = get_variable_data_type(var);
  14044. if (desc_set >= kMaxArgumentBuffers)
  14045. SPIRV_CROSS_THROW("Descriptor set index is out of range.");
  14046. const MSLConstexprSampler *constexpr_sampler = nullptr;
  14047. if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
  14048. {
  14049. constexpr_sampler = find_constexpr_sampler(var_id);
  14050. if (constexpr_sampler)
  14051. {
  14052. // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
  14053. constexpr_samplers_by_id[var_id] = *constexpr_sampler;
  14054. }
  14055. }
  14056. uint32_t binding = get_decoration(var_id, DecorationBinding);
  14057. if (type.basetype == SPIRType::SampledImage)
  14058. {
  14059. add_resource_name(var_id);
  14060. uint32_t plane_count = 1;
  14061. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  14062. plane_count = constexpr_sampler->planes;
  14063. for (uint32_t i = 0; i < plane_count; i++)
  14064. {
  14065. uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
  14066. resources_in_set[desc_set].push_back(
  14067. { &var, to_name(var_id), SPIRType::Image, image_resource_index, i });
  14068. }
  14069. if (type.image.dim != DimBuffer && !constexpr_sampler)
  14070. {
  14071. uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
  14072. resources_in_set[desc_set].push_back(
  14073. { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 });
  14074. }
  14075. }
  14076. else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding }))
  14077. {
  14078. inline_block_vars.push_back(var_id);
  14079. }
  14080. else if (!constexpr_sampler && is_supported_argument_buffer_type(type))
  14081. {
  14082. // constexpr samplers are not declared as resources.
  14083. // Inline uniform blocks are always emitted at the end.
  14084. add_resource_name(var_id);
  14085. resources_in_set[desc_set].push_back(
  14086. { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
  14087. // Emulate texture2D atomic operations
  14088. if (atomic_image_vars.count(var.self))
  14089. {
  14090. uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
  14091. resources_in_set[desc_set].push_back(
  14092. { &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 });
  14093. }
  14094. }
  14095. // Check if this descriptor set needs a swizzle buffer.
  14096. if (needs_swizzle_buffer_def && is_sampled_image_type(type))
  14097. set_needs_swizzle_buffer[desc_set] = true;
  14098. else if (buffers_requiring_array_length.count(var_id) != 0)
  14099. {
  14100. set_needs_buffer_sizes[desc_set] = true;
  14101. needs_buffer_sizes = true;
  14102. }
  14103. }
  14104. });
  14105. if (needs_swizzle_buffer_def || needs_buffer_sizes)
  14106. {
  14107. uint32_t uint_ptr_type_id = 0;
  14108. // We might have to add a swizzle buffer resource to the set.
  14109. for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
  14110. {
  14111. if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set])
  14112. continue;
  14113. if (uint_ptr_type_id == 0)
  14114. {
  14115. uint_ptr_type_id = ir.increase_bound_by(1);
  14116. // Create a buffer to hold extra data, including the swizzle constants.
  14117. SPIRType uint_type_pointer = get_uint_type();
  14118. uint_type_pointer.pointer = true;
  14119. uint_type_pointer.pointer_depth++;
  14120. uint_type_pointer.parent_type = get_uint_type_id();
  14121. uint_type_pointer.storage = StorageClassUniform;
  14122. set<SPIRType>(uint_ptr_type_id, uint_type_pointer);
  14123. set_decoration(uint_ptr_type_id, DecorationArrayStride, 4);
  14124. }
  14125. if (set_needs_swizzle_buffer[desc_set])
  14126. {
  14127. uint32_t var_id = ir.increase_bound_by(1);
  14128. auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
  14129. set_name(var_id, "spvSwizzleConstants");
  14130. set_decoration(var_id, DecorationDescriptorSet, desc_set);
  14131. set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
  14132. resources_in_set[desc_set].push_back(
  14133. { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
  14134. }
  14135. if (set_needs_buffer_sizes[desc_set])
  14136. {
  14137. uint32_t var_id = ir.increase_bound_by(1);
  14138. auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
  14139. set_name(var_id, "spvBufferSizeConstants");
  14140. set_decoration(var_id, DecorationDescriptorSet, desc_set);
  14141. set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
  14142. resources_in_set[desc_set].push_back(
  14143. { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
  14144. }
  14145. }
  14146. }
  14147. // Now add inline uniform blocks.
  14148. for (uint32_t var_id : inline_block_vars)
  14149. {
  14150. auto &var = get<SPIRVariable>(var_id);
  14151. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  14152. add_resource_name(var_id);
  14153. resources_in_set[desc_set].push_back(
  14154. { &var, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 });
  14155. }
  14156. for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
  14157. {
  14158. auto &resources = resources_in_set[desc_set];
  14159. if (resources.empty())
  14160. continue;
  14161. assert(descriptor_set_is_argument_buffer(desc_set));
  14162. uint32_t next_id = ir.increase_bound_by(3);
  14163. uint32_t type_id = next_id + 1;
  14164. uint32_t ptr_type_id = next_id + 2;
  14165. argument_buffer_ids[desc_set] = next_id;
  14166. auto &buffer_type = set<SPIRType>(type_id);
  14167. buffer_type.basetype = SPIRType::Struct;
  14168. if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0)
  14169. {
  14170. buffer_type.storage = StorageClassStorageBuffer;
  14171. // Make sure the argument buffer gets marked as const device.
  14172. set_decoration(next_id, DecorationNonWritable);
  14173. // Need to mark the type as a Block to enable this.
  14174. set_decoration(type_id, DecorationBlock);
  14175. }
  14176. else
  14177. buffer_type.storage = StorageClassUniform;
  14178. set_name(type_id, join("spvDescriptorSetBuffer", desc_set));
  14179. auto &ptr_type = set<SPIRType>(ptr_type_id);
  14180. ptr_type = buffer_type;
  14181. ptr_type.pointer = true;
  14182. ptr_type.pointer_depth++;
  14183. ptr_type.parent_type = type_id;
  14184. uint32_t buffer_variable_id = next_id;
  14185. set<SPIRVariable>(buffer_variable_id, ptr_type_id, StorageClassUniform);
  14186. set_name(buffer_variable_id, join("spvDescriptorSet", desc_set));
  14187. // Ids must be emitted in ID order.
  14188. sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool {
  14189. return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype);
  14190. });
  14191. uint32_t member_index = 0;
  14192. uint32_t next_arg_buff_index = 0;
  14193. for (auto &resource : resources)
  14194. {
  14195. auto &var = *resource.var;
  14196. auto &type = get_variable_data_type(var);
  14197. // If needed, synthesize and add padding members.
  14198. // member_index and next_arg_buff_index are incremented when padding members are added.
  14199. if (msl_options.pad_argument_buffer_resources)
  14200. {
  14201. while (resource.index > next_arg_buff_index)
  14202. {
  14203. auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
  14204. switch (rez_bind.basetype)
  14205. {
  14206. case SPIRType::Void:
  14207. case SPIRType::Boolean:
  14208. case SPIRType::SByte:
  14209. case SPIRType::UByte:
  14210. case SPIRType::Short:
  14211. case SPIRType::UShort:
  14212. case SPIRType::Int:
  14213. case SPIRType::UInt:
  14214. case SPIRType::Int64:
  14215. case SPIRType::UInt64:
  14216. case SPIRType::AtomicCounter:
  14217. case SPIRType::Half:
  14218. case SPIRType::Float:
  14219. case SPIRType::Double:
  14220. add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  14221. break;
  14222. case SPIRType::Image:
  14223. add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  14224. break;
  14225. case SPIRType::Sampler:
  14226. add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  14227. break;
  14228. case SPIRType::SampledImage:
  14229. if (next_arg_buff_index == rez_bind.msl_sampler)
  14230. add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  14231. else
  14232. add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  14233. break;
  14234. default:
  14235. break;
  14236. }
  14237. }
  14238. // Adjust the number of slots consumed by current member itself.
  14239. // If actual member is an array, allow runtime array resolution as well.
  14240. uint32_t elem_cnt = type.array.empty() ? 1 : to_array_size_literal(type);
  14241. if (elem_cnt == 0)
  14242. elem_cnt = get_resource_array_size(var.self);
  14243. next_arg_buff_index += elem_cnt;
  14244. }
  14245. string mbr_name = ensure_valid_name(resource.name, "m");
  14246. if (resource.plane > 0)
  14247. mbr_name += join(plane_name_suffix, resource.plane);
  14248. set_member_name(buffer_type.self, member_index, mbr_name);
  14249. if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler)
  14250. {
  14251. // Have to synthesize a sampler type here.
  14252. bool type_is_array = !type.array.empty();
  14253. uint32_t sampler_type_id = ir.increase_bound_by(type_is_array ? 2 : 1);
  14254. auto &new_sampler_type = set<SPIRType>(sampler_type_id);
  14255. new_sampler_type.basetype = SPIRType::Sampler;
  14256. new_sampler_type.storage = StorageClassUniformConstant;
  14257. if (type_is_array)
  14258. {
  14259. uint32_t sampler_type_array_id = sampler_type_id + 1;
  14260. auto &sampler_type_array = set<SPIRType>(sampler_type_array_id);
  14261. sampler_type_array = new_sampler_type;
  14262. sampler_type_array.array = type.array;
  14263. sampler_type_array.array_size_literal = type.array_size_literal;
  14264. sampler_type_array.parent_type = sampler_type_id;
  14265. buffer_type.member_types.push_back(sampler_type_array_id);
  14266. }
  14267. else
  14268. buffer_type.member_types.push_back(sampler_type_id);
  14269. }
  14270. else
  14271. {
  14272. uint32_t binding = get_decoration(var.self, DecorationBinding);
  14273. SetBindingPair pair = { desc_set, binding };
  14274. if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
  14275. resource.basetype == SPIRType::SampledImage)
  14276. {
  14277. // Drop pointer information when we emit the resources into a struct.
  14278. buffer_type.member_types.push_back(get_variable_data_type_id(var));
  14279. if (resource.plane == 0)
  14280. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  14281. }
  14282. else if (buffers_requiring_dynamic_offset.count(pair))
  14283. {
  14284. // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
  14285. buffer_type.member_types.push_back(var.basetype);
  14286. buffers_requiring_dynamic_offset[pair].second = var.self;
  14287. }
  14288. else if (inline_uniform_blocks.count(pair))
  14289. {
  14290. // Put the buffer block itself into the argument buffer.
  14291. buffer_type.member_types.push_back(get_variable_data_type_id(var));
  14292. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  14293. }
  14294. else if (atomic_image_vars.count(var.self))
  14295. {
  14296. // Emulate texture2D atomic operations.
  14297. // Don't set the qualified name: it's already set for this variable,
  14298. // and the code that references the buffer manually appends "_atomic"
  14299. // to the name.
  14300. uint32_t offset = ir.increase_bound_by(2);
  14301. uint32_t atomic_type_id = offset;
  14302. uint32_t type_ptr_id = offset + 1;
  14303. SPIRType atomic_type;
  14304. atomic_type.basetype = SPIRType::AtomicCounter;
  14305. atomic_type.width = 32;
  14306. atomic_type.vecsize = 1;
  14307. set<SPIRType>(atomic_type_id, atomic_type);
  14308. atomic_type.pointer = true;
  14309. atomic_type.pointer_depth++;
  14310. atomic_type.parent_type = atomic_type_id;
  14311. atomic_type.storage = StorageClassStorageBuffer;
  14312. auto &atomic_ptr_type = set<SPIRType>(type_ptr_id, atomic_type);
  14313. atomic_ptr_type.self = atomic_type_id;
  14314. buffer_type.member_types.push_back(type_ptr_id);
  14315. }
  14316. else
  14317. {
  14318. // Resources will be declared as pointers not references, so automatically dereference as appropriate.
  14319. buffer_type.member_types.push_back(var.basetype);
  14320. if (type.array.empty())
  14321. set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")"));
  14322. else
  14323. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  14324. }
  14325. }
  14326. set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary,
  14327. resource.index);
  14328. set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID,
  14329. var.self);
  14330. member_index++;
  14331. }
  14332. }
  14333. }
  14334. // Return the resource type of the app-provided resources for the descriptor set,
  14335. // that matches the resource index of the argument buffer index.
  14336. // This is a two-step lookup, first lookup the resource binding number from the argument buffer index,
  14337. // then lookup the resource binding using the binding number.
  14338. MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx)
  14339. {
  14340. auto stage = get_entry_point().model;
  14341. StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx };
  14342. auto arg_itr = resource_arg_buff_idx_to_binding_number.find(arg_idx_tuple);
  14343. if (arg_itr != end(resource_arg_buff_idx_to_binding_number))
  14344. {
  14345. StageSetBinding bind_tuple = { stage, desc_set, arg_itr->second };
  14346. auto bind_itr = resource_bindings.find(bind_tuple);
  14347. if (bind_itr != end(resource_bindings))
  14348. return bind_itr->second.first;
  14349. }
  14350. SPIRV_CROSS_THROW("Argument buffer resource base type could not be determined. When padding argument buffer "
  14351. "elements, all descriptor set resources must be supplied with a base type by the app.");
  14352. }
  14353. // Adds an argument buffer padding argument buffer type as one or more members of the struct type at the member index.
  14354. // Metal does not support arrays of buffers, so these are emitted as multiple struct members.
  14355. void CompilerMSL::add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx,
  14356. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  14357. {
  14358. if (!argument_buffer_padding_buffer_type_id)
  14359. {
  14360. uint32_t buff_type_id = ir.increase_bound_by(2);
  14361. auto &buff_type = set<SPIRType>(buff_type_id);
  14362. buff_type.basetype = rez_bind.basetype;
  14363. buff_type.storage = StorageClassUniformConstant;
  14364. uint32_t ptr_type_id = buff_type_id + 1;
  14365. auto &ptr_type = set<SPIRType>(ptr_type_id);
  14366. ptr_type = buff_type;
  14367. ptr_type.pointer = true;
  14368. ptr_type.pointer_depth++;
  14369. ptr_type.parent_type = buff_type_id;
  14370. argument_buffer_padding_buffer_type_id = ptr_type_id;
  14371. }
  14372. for (uint32_t rez_idx = 0; rez_idx < rez_bind.count; rez_idx++)
  14373. add_argument_buffer_padding_type(argument_buffer_padding_buffer_type_id, struct_type, mbr_idx, arg_buff_index, 1);
  14374. }
  14375. // Adds an argument buffer padding argument image type as a member of the struct type at the member index.
  14376. void CompilerMSL::add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx,
  14377. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  14378. {
  14379. if (!argument_buffer_padding_image_type_id)
  14380. {
  14381. uint32_t base_type_id = ir.increase_bound_by(2);
  14382. auto &base_type = set<SPIRType>(base_type_id);
  14383. base_type.basetype = SPIRType::Float;
  14384. base_type.width = 32;
  14385. uint32_t img_type_id = base_type_id + 1;
  14386. auto &img_type = set<SPIRType>(img_type_id);
  14387. img_type.basetype = SPIRType::Image;
  14388. img_type.storage = StorageClassUniformConstant;
  14389. img_type.image.type = base_type_id;
  14390. img_type.image.dim = Dim2D;
  14391. img_type.image.depth = false;
  14392. img_type.image.arrayed = false;
  14393. img_type.image.ms = false;
  14394. img_type.image.sampled = 1;
  14395. img_type.image.format = ImageFormatUnknown;
  14396. img_type.image.access = AccessQualifierMax;
  14397. argument_buffer_padding_image_type_id = img_type_id;
  14398. }
  14399. add_argument_buffer_padding_type(argument_buffer_padding_image_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
  14400. }
  14401. // Adds an argument buffer padding argument sampler type as a member of the struct type at the member index.
  14402. void CompilerMSL::add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx,
  14403. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  14404. {
  14405. if (!argument_buffer_padding_sampler_type_id)
  14406. {
  14407. uint32_t samp_type_id = ir.increase_bound_by(1);
  14408. auto &samp_type = set<SPIRType>(samp_type_id);
  14409. samp_type.basetype = SPIRType::Sampler;
  14410. samp_type.storage = StorageClassUniformConstant;
  14411. argument_buffer_padding_sampler_type_id = samp_type_id;
  14412. }
  14413. add_argument_buffer_padding_type(argument_buffer_padding_sampler_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
  14414. }
  14415. // Adds the argument buffer padding argument type as a member of the struct type at the member index.
  14416. // Advances both arg_buff_index and mbr_idx to next argument slots.
  14417. void CompilerMSL::add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx,
  14418. uint32_t &arg_buff_index, uint32_t count)
  14419. {
  14420. uint32_t type_id = mbr_type_id;
  14421. if (count > 1)
  14422. {
  14423. uint32_t ary_type_id = ir.increase_bound_by(1);
  14424. auto &ary_type = set<SPIRType>(ary_type_id);
  14425. ary_type = get<SPIRType>(type_id);
  14426. ary_type.array.push_back(count);
  14427. ary_type.array_size_literal.push_back(true);
  14428. ary_type.parent_type = type_id;
  14429. type_id = ary_type_id;
  14430. }
  14431. set_member_name(struct_type.self, mbr_idx, join("_m", arg_buff_index, "_pad"));
  14432. set_extended_member_decoration(struct_type.self, mbr_idx, SPIRVCrossDecorationResourceIndexPrimary, arg_buff_index);
  14433. struct_type.member_types.push_back(type_id);
  14434. arg_buff_index += count;
  14435. mbr_idx++;
  14436. }
  14437. void CompilerMSL::activate_argument_buffer_resources()
  14438. {
  14439. // For ABI compatibility, force-enable all resources which are part of argument buffers.
  14440. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, const SPIRVariable &) {
  14441. if (!has_decoration(self, DecorationDescriptorSet))
  14442. return;
  14443. uint32_t desc_set = get_decoration(self, DecorationDescriptorSet);
  14444. if (descriptor_set_is_argument_buffer(desc_set))
  14445. active_interface_variables.insert(self);
  14446. });
  14447. }
  14448. bool CompilerMSL::using_builtin_array() const
  14449. {
  14450. return msl_options.force_native_arrays || is_using_builtin_array;
  14451. }
  14452. void CompilerMSL::set_combined_sampler_suffix(const char *suffix)
  14453. {
  14454. sampler_name_suffix = suffix;
  14455. }
  14456. const char *CompilerMSL::get_combined_sampler_suffix() const
  14457. {
  14458. return sampler_name_suffix.c_str();
  14459. }
  14460. void CompilerMSL::emit_block_hints(const SPIRBlock &)
  14461. {
  14462. }
  14463. string CompilerMSL::additional_fixed_sample_mask_str() const
  14464. {
  14465. char print_buffer[32];
  14466. sprintf(print_buffer, "0x%x", msl_options.additional_fixed_sample_mask);
  14467. return print_buffer;
  14468. }